1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetLowering.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Target/TargetRegisterInfo.h"
40 #include "llvm/Target/TargetSubtargetInfo.h"
41 #include <algorithm>
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "dagcombine"
45 
46 STATISTIC(NodesCombined   , "Number of dag nodes combined");
47 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
48 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
49 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
50 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
51 STATISTIC(SlicedLoads, "Number of load sliced");
52 
53 namespace {
54   static cl::opt<bool>
55     CombinerAA("combiner-alias-analysis", cl::Hidden,
56                cl::desc("Enable DAG combiner alias-analysis heuristics"));
57 
58   static cl::opt<bool>
59     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
60                cl::desc("Enable DAG combiner's use of IR alias analysis"));
61 
62   static cl::opt<bool>
63     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
64                cl::desc("Enable DAG combiner's use of TBAA"));
65 
66 #ifndef NDEBUG
67   static cl::opt<std::string>
68     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
69                cl::desc("Only use DAG-combiner alias analysis in this"
70                         " function"));
71 #endif
72 
73   /// Hidden option to stress test load slicing, i.e., when this option
74   /// is enabled, load slicing bypasses most of its profitability guards.
75   static cl::opt<bool>
76   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
77                     cl::desc("Bypass the profitability model of load "
78                              "slicing"),
79                     cl::init(false));
80 
81   static cl::opt<bool>
82     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
83                       cl::desc("DAG combiner may split indexing from loads"));
84 
85 //------------------------------ DAGCombiner ---------------------------------//
86 
87   class DAGCombiner {
88     SelectionDAG &DAG;
89     const TargetLowering &TLI;
90     CombineLevel Level;
91     CodeGenOpt::Level OptLevel;
92     bool LegalOperations;
93     bool LegalTypes;
94     bool ForCodeSize;
95 
96     /// \brief Worklist of all of the nodes that need to be simplified.
97     ///
98     /// This must behave as a stack -- new nodes to process are pushed onto the
99     /// back and when processing we pop off of the back.
100     ///
101     /// The worklist will not contain duplicates but may contain null entries
102     /// due to nodes being deleted from the underlying DAG.
103     SmallVector<SDNode *, 64> Worklist;
104 
105     /// \brief Mapping from an SDNode to its position on the worklist.
106     ///
107     /// This is used to find and remove nodes from the worklist (by nulling
108     /// them) when they are deleted from the underlying DAG. It relies on
109     /// stable indices of nodes within the worklist.
110     DenseMap<SDNode *, unsigned> WorklistMap;
111 
112     /// \brief Set of nodes which have been combined (at least once).
113     ///
114     /// This is used to allow us to reliably add any operands of a DAG node
115     /// which have not yet been combined to the worklist.
116     SmallPtrSet<SDNode *, 32> CombinedNodes;
117 
118     // AA - Used for DAG load/store alias analysis.
119     AliasAnalysis &AA;
120 
121     /// When an instruction is simplified, add all users of the instruction to
122     /// the work lists because they might get more simplified now.
123     void AddUsersToWorklist(SDNode *N) {
124       for (SDNode *Node : N->uses())
125         AddToWorklist(Node);
126     }
127 
128     /// Call the node-specific routine that folds each particular type of node.
129     SDValue visit(SDNode *N);
130 
131   public:
132     /// Add to the worklist making sure its instance is at the back (next to be
133     /// processed.)
134     void AddToWorklist(SDNode *N) {
135       // Skip handle nodes as they can't usefully be combined and confuse the
136       // zero-use deletion strategy.
137       if (N->getOpcode() == ISD::HANDLENODE)
138         return;
139 
140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
141         Worklist.push_back(N);
142     }
143 
144     /// Remove all instances of N from the worklist.
145     void removeFromWorklist(SDNode *N) {
146       CombinedNodes.erase(N);
147 
148       auto It = WorklistMap.find(N);
149       if (It == WorklistMap.end())
150         return; // Not in the worklist.
151 
152       // Null out the entry rather than erasing it to avoid a linear operation.
153       Worklist[It->second] = nullptr;
154       WorklistMap.erase(It);
155     }
156 
157     void deleteAndRecombine(SDNode *N);
158     bool recursivelyDeleteUnusedNodes(SDNode *N);
159 
160     /// Replaces all uses of the results of one DAG node with new values.
161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
162                       bool AddTo = true);
163 
164     /// Replaces all uses of the results of one DAG node with new values.
165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
166       return CombineTo(N, &Res, 1, AddTo);
167     }
168 
169     /// Replaces all uses of the results of one DAG node with new values.
170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
171                       bool AddTo = true) {
172       SDValue To[] = { Res0, Res1 };
173       return CombineTo(N, To, 2, AddTo);
174     }
175 
176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
177 
178   private:
179 
180     /// Check the specified integer node value to see if it can be simplified or
181     /// if things it uses can be simplified by bit propagation.
182     /// If so, return true.
183     bool SimplifyDemandedBits(SDValue Op) {
184       unsigned BitWidth = Op.getScalarValueSizeInBits();
185       APInt Demanded = APInt::getAllOnesValue(BitWidth);
186       return SimplifyDemandedBits(Op, Demanded);
187     }
188 
189     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
190 
191     bool CombineToPreIndexedLoadStore(SDNode *N);
192     bool CombineToPostIndexedLoadStore(SDNode *N);
193     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
194     bool SliceUpLoad(SDNode *N);
195 
196     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
197     ///   load.
198     ///
199     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
200     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
201     /// \param EltNo index of the vector element to load.
202     /// \param OriginalLoad load that EVE came from to be replaced.
203     /// \returns EVE on success SDValue() on failure.
204     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
205         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
206     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
207     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
208     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
209     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
210     SDValue PromoteIntBinOp(SDValue Op);
211     SDValue PromoteIntShiftOp(SDValue Op);
212     SDValue PromoteExtend(SDValue Op);
213     bool PromoteLoad(SDValue Op);
214 
215     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
216                          SDValue ExtLoad, const SDLoc &DL,
217                          ISD::NodeType ExtType);
218 
219     /// Call the node-specific routine that knows how to fold each
220     /// particular type of node. If that doesn't do anything, try the
221     /// target-specific DAG combines.
222     SDValue combine(SDNode *N);
223 
224     // Visitation implementation - Implement dag node combining for different
225     // node types.  The semantics are as follows:
226     // Return Value:
227     //   SDValue.getNode() == 0 - No change was made
228     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
229     //   otherwise              - N should be replaced by the returned Operand.
230     //
231     SDValue visitTokenFactor(SDNode *N);
232     SDValue visitMERGE_VALUES(SDNode *N);
233     SDValue visitADD(SDNode *N);
234     SDValue visitSUB(SDNode *N);
235     SDValue visitADDC(SDNode *N);
236     SDValue visitSUBC(SDNode *N);
237     SDValue visitADDE(SDNode *N);
238     SDValue visitSUBE(SDNode *N);
239     SDValue visitMUL(SDNode *N);
240     SDValue useDivRem(SDNode *N);
241     SDValue visitSDIV(SDNode *N);
242     SDValue visitUDIV(SDNode *N);
243     SDValue visitREM(SDNode *N);
244     SDValue visitMULHU(SDNode *N);
245     SDValue visitMULHS(SDNode *N);
246     SDValue visitSMUL_LOHI(SDNode *N);
247     SDValue visitUMUL_LOHI(SDNode *N);
248     SDValue visitSMULO(SDNode *N);
249     SDValue visitUMULO(SDNode *N);
250     SDValue visitIMINMAX(SDNode *N);
251     SDValue visitAND(SDNode *N);
252     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
253     SDValue visitOR(SDNode *N);
254     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
255     SDValue visitXOR(SDNode *N);
256     SDValue SimplifyVBinOp(SDNode *N);
257     SDValue visitSHL(SDNode *N);
258     SDValue visitSRA(SDNode *N);
259     SDValue visitSRL(SDNode *N);
260     SDValue visitRotate(SDNode *N);
261     SDValue visitBSWAP(SDNode *N);
262     SDValue visitBITREVERSE(SDNode *N);
263     SDValue visitCTLZ(SDNode *N);
264     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
265     SDValue visitCTTZ(SDNode *N);
266     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
267     SDValue visitCTPOP(SDNode *N);
268     SDValue visitSELECT(SDNode *N);
269     SDValue visitVSELECT(SDNode *N);
270     SDValue visitSELECT_CC(SDNode *N);
271     SDValue visitSETCC(SDNode *N);
272     SDValue visitSETCCE(SDNode *N);
273     SDValue visitSIGN_EXTEND(SDNode *N);
274     SDValue visitZERO_EXTEND(SDNode *N);
275     SDValue visitANY_EXTEND(SDNode *N);
276     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
277     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
278     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
279     SDValue visitTRUNCATE(SDNode *N);
280     SDValue visitBITCAST(SDNode *N);
281     SDValue visitBUILD_PAIR(SDNode *N);
282     SDValue visitFADD(SDNode *N);
283     SDValue visitFSUB(SDNode *N);
284     SDValue visitFMUL(SDNode *N);
285     SDValue visitFMA(SDNode *N);
286     SDValue visitFDIV(SDNode *N);
287     SDValue visitFREM(SDNode *N);
288     SDValue visitFSQRT(SDNode *N);
289     SDValue visitFCOPYSIGN(SDNode *N);
290     SDValue visitSINT_TO_FP(SDNode *N);
291     SDValue visitUINT_TO_FP(SDNode *N);
292     SDValue visitFP_TO_SINT(SDNode *N);
293     SDValue visitFP_TO_UINT(SDNode *N);
294     SDValue visitFP_ROUND(SDNode *N);
295     SDValue visitFP_ROUND_INREG(SDNode *N);
296     SDValue visitFP_EXTEND(SDNode *N);
297     SDValue visitFNEG(SDNode *N);
298     SDValue visitFABS(SDNode *N);
299     SDValue visitFCEIL(SDNode *N);
300     SDValue visitFTRUNC(SDNode *N);
301     SDValue visitFFLOOR(SDNode *N);
302     SDValue visitFMINNUM(SDNode *N);
303     SDValue visitFMAXNUM(SDNode *N);
304     SDValue visitBRCOND(SDNode *N);
305     SDValue visitBR_CC(SDNode *N);
306     SDValue visitLOAD(SDNode *N);
307 
308     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
309     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
310 
311     SDValue visitSTORE(SDNode *N);
312     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
313     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
314     SDValue visitBUILD_VECTOR(SDNode *N);
315     SDValue visitCONCAT_VECTORS(SDNode *N);
316     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
317     SDValue visitVECTOR_SHUFFLE(SDNode *N);
318     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
319     SDValue visitINSERT_SUBVECTOR(SDNode *N);
320     SDValue visitMLOAD(SDNode *N);
321     SDValue visitMSTORE(SDNode *N);
322     SDValue visitMGATHER(SDNode *N);
323     SDValue visitMSCATTER(SDNode *N);
324     SDValue visitFP_TO_FP16(SDNode *N);
325     SDValue visitFP16_TO_FP(SDNode *N);
326 
327     SDValue visitFADDForFMACombine(SDNode *N);
328     SDValue visitFSUBForFMACombine(SDNode *N);
329     SDValue visitFMULForFMACombine(SDNode *N);
330 
331     SDValue XformToShuffleWithZero(SDNode *N);
332     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
333                            SDValue RHS);
334 
335     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
336 
337     SDValue foldSelectOfConstants(SDNode *N);
338     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
339     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
340     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
341     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
342                              SDValue N2, SDValue N3, ISD::CondCode CC,
343                              bool NotExtCompare = false);
344     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
345                           const SDLoc &DL, bool foldBooleans = true);
346 
347     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
348                            SDValue &CC) const;
349     bool isOneUseSetCC(SDValue N) const;
350 
351     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
352                                          unsigned HiOp);
353     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
354     SDValue CombineExtLoad(SDNode *N);
355     SDValue combineRepeatedFPDivisors(SDNode *N);
356     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
357     SDValue BuildSDIV(SDNode *N);
358     SDValue BuildSDIVPow2(SDNode *N);
359     SDValue BuildUDIV(SDNode *N);
360     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
361     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
362     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
363     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
364     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
365                                 SDNodeFlags *Flags, bool Reciprocal);
366     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
367                                 SDNodeFlags *Flags, bool Reciprocal);
368     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
369                                bool DemandHighBits = true);
370     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
371     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
372                               SDValue InnerPos, SDValue InnerNeg,
373                               unsigned PosOpcode, unsigned NegOpcode,
374                               const SDLoc &DL);
375     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
376     SDValue ReduceLoadWidth(SDNode *N);
377     SDValue ReduceLoadOpStoreWidth(SDNode *N);
378     SDValue splitMergedValStore(StoreSDNode *ST);
379     SDValue TransformFPLoadStorePair(SDNode *N);
380     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
381     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
382     SDValue reduceBuildVecToShuffle(SDNode *N);
383     SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
384                                   SDValue VecIn1, SDValue VecIn2,
385                                   unsigned LeftIdx);
386 
387     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
388 
389     /// Walk up chain skipping non-aliasing memory nodes,
390     /// looking for aliasing nodes and adding them to the Aliases vector.
391     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
392                           SmallVectorImpl<SDValue> &Aliases);
393 
394     /// Return true if there is any possibility that the two addresses overlap.
395     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
396 
397     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
398     /// chain (aliasing node.)
399     SDValue FindBetterChain(SDNode *N, SDValue Chain);
400 
401     /// Try to replace a store and any possibly adjacent stores on
402     /// consecutive chains with better chains. Return true only if St is
403     /// replaced.
404     ///
405     /// Notice that other chains may still be replaced even if the function
406     /// returns false.
407     bool findBetterNeighborChains(StoreSDNode *St);
408 
409     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
410     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
411 
412     /// Holds a pointer to an LSBaseSDNode as well as information on where it
413     /// is located in a sequence of memory operations connected by a chain.
414     struct MemOpLink {
415       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
416       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
417       // Ptr to the mem node.
418       LSBaseSDNode *MemNode;
419       // Offset from the base ptr.
420       int64_t OffsetFromBase;
421       // What is the sequence number of this mem node.
422       // Lowest mem operand in the DAG starts at zero.
423       unsigned SequenceNum;
424     };
425 
426     /// This is a helper function for visitMUL to check the profitability
427     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
428     /// MulNode is the original multiply, AddNode is (add x, c1),
429     /// and ConstNode is c2.
430     bool isMulAddWithConstProfitable(SDNode *MulNode,
431                                      SDValue &AddNode,
432                                      SDValue &ConstNode);
433 
434     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
435     /// constant build_vector of the stored constant values in Stores.
436     SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
437                                          ArrayRef<MemOpLink> Stores,
438                                          SmallVectorImpl<SDValue> &Chains,
439                                          EVT Ty) const;
440 
441     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
442     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
443     /// the type of the loaded value to be extended.  LoadedVT returns the type
444     /// of the original loaded value.  NarrowLoad returns whether the load would
445     /// need to be narrowed in order to match.
446     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
447                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
448                           bool &NarrowLoad);
449 
450     /// This is a helper function for MergeConsecutiveStores. When the source
451     /// elements of the consecutive stores are all constants or all extracted
452     /// vector elements, try to merge them into one larger store.
453     /// \return True if a merged store was created.
454     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
455                                          EVT MemVT, unsigned NumStores,
456                                          bool IsConstantSrc, bool UseVector);
457 
458     /// This is a helper function for MergeConsecutiveStores.
459     /// Stores that may be merged are placed in StoreNodes.
460     /// Loads that may alias with those stores are placed in AliasLoadNodes.
461     void getStoreMergeAndAliasCandidates(
462         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
463         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
464 
465     /// Helper function for MergeConsecutiveStores. Checks if
466     /// Candidate stores have indirect dependency through their
467     /// operands. \return True if safe to merge
468     bool checkMergeStoreCandidatesForDependencies(
469         SmallVectorImpl<MemOpLink> &StoreNodes);
470 
471     /// Merge consecutive store operations into a wide store.
472     /// This optimization uses wide integers or vectors when possible.
473     /// \return True if some memory operations were changed.
474     bool MergeConsecutiveStores(StoreSDNode *N);
475 
476     /// \brief Try to transform a truncation where C is a constant:
477     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
478     ///
479     /// \p N needs to be a truncation and its first operand an AND. Other
480     /// requirements are checked by the function (e.g. that trunc is
481     /// single-use) and if missed an empty SDValue is returned.
482     SDValue distributeTruncateThroughAnd(SDNode *N);
483 
484   public:
485     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
486         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
487           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
488       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
489     }
490 
491     /// Runs the dag combiner on all nodes in the work list
492     void Run(CombineLevel AtLevel);
493 
494     SelectionDAG &getDAG() const { return DAG; }
495 
496     /// Returns a type large enough to hold any valid shift amount - before type
497     /// legalization these can be huge.
498     EVT getShiftAmountTy(EVT LHSTy) {
499       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
500       if (LHSTy.isVector())
501         return LHSTy;
502       auto &DL = DAG.getDataLayout();
503       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
504                         : TLI.getPointerTy(DL);
505     }
506 
507     /// This method returns true if we are running before type legalization or
508     /// if the specified VT is legal.
509     bool isTypeLegal(const EVT &VT) {
510       if (!LegalTypes) return true;
511       return TLI.isTypeLegal(VT);
512     }
513 
514     /// Convenience wrapper around TargetLowering::getSetCCResultType
515     EVT getSetCCResultType(EVT VT) const {
516       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
517     }
518   };
519 }
520 
521 
522 namespace {
523 /// This class is a DAGUpdateListener that removes any deleted
524 /// nodes from the worklist.
525 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
526   DAGCombiner &DC;
527 public:
528   explicit WorklistRemover(DAGCombiner &dc)
529     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
530 
531   void NodeDeleted(SDNode *N, SDNode *E) override {
532     DC.removeFromWorklist(N);
533   }
534 };
535 }
536 
537 //===----------------------------------------------------------------------===//
538 //  TargetLowering::DAGCombinerInfo implementation
539 //===----------------------------------------------------------------------===//
540 
541 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
542   ((DAGCombiner*)DC)->AddToWorklist(N);
543 }
544 
545 SDValue TargetLowering::DAGCombinerInfo::
546 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
547   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
548 }
549 
550 SDValue TargetLowering::DAGCombinerInfo::
551 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
552   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
553 }
554 
555 
556 SDValue TargetLowering::DAGCombinerInfo::
557 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
558   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
559 }
560 
561 void TargetLowering::DAGCombinerInfo::
562 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
563   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
564 }
565 
566 //===----------------------------------------------------------------------===//
567 // Helper Functions
568 //===----------------------------------------------------------------------===//
569 
570 void DAGCombiner::deleteAndRecombine(SDNode *N) {
571   removeFromWorklist(N);
572 
573   // If the operands of this node are only used by the node, they will now be
574   // dead. Make sure to re-visit them and recursively delete dead nodes.
575   for (const SDValue &Op : N->ops())
576     // For an operand generating multiple values, one of the values may
577     // become dead allowing further simplification (e.g. split index
578     // arithmetic from an indexed load).
579     if (Op->hasOneUse() || Op->getNumValues() > 1)
580       AddToWorklist(Op.getNode());
581 
582   DAG.DeleteNode(N);
583 }
584 
585 /// Return 1 if we can compute the negated form of the specified expression for
586 /// the same cost as the expression itself, or 2 if we can compute the negated
587 /// form more cheaply than the expression itself.
588 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
589                                const TargetLowering &TLI,
590                                const TargetOptions *Options,
591                                unsigned Depth = 0) {
592   // fneg is removable even if it has multiple uses.
593   if (Op.getOpcode() == ISD::FNEG) return 2;
594 
595   // Don't allow anything with multiple uses.
596   if (!Op.hasOneUse()) return 0;
597 
598   // Don't recurse exponentially.
599   if (Depth > 6) return 0;
600 
601   switch (Op.getOpcode()) {
602   default: return false;
603   case ISD::ConstantFP:
604     // Don't invert constant FP values after legalize.  The negated constant
605     // isn't necessarily legal.
606     return LegalOperations ? 0 : 1;
607   case ISD::FADD:
608     // FIXME: determine better conditions for this xform.
609     if (!Options->UnsafeFPMath) return 0;
610 
611     // After operation legalization, it might not be legal to create new FSUBs.
612     if (LegalOperations &&
613         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
614       return 0;
615 
616     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
617     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
618                                     Options, Depth + 1))
619       return V;
620     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
621     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
622                               Depth + 1);
623   case ISD::FSUB:
624     // We can't turn -(A-B) into B-A when we honor signed zeros.
625     if (!Options->UnsafeFPMath) return 0;
626 
627     // fold (fneg (fsub A, B)) -> (fsub B, A)
628     return 1;
629 
630   case ISD::FMUL:
631   case ISD::FDIV:
632     if (Options->HonorSignDependentRoundingFPMath()) return 0;
633 
634     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
635     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
636                                     Options, Depth + 1))
637       return V;
638 
639     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
640                               Depth + 1);
641 
642   case ISD::FP_EXTEND:
643   case ISD::FP_ROUND:
644   case ISD::FSIN:
645     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
646                               Depth + 1);
647   }
648 }
649 
650 /// If isNegatibleForFree returns true, return the newly negated expression.
651 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
652                                     bool LegalOperations, unsigned Depth = 0) {
653   const TargetOptions &Options = DAG.getTarget().Options;
654   // fneg is removable even if it has multiple uses.
655   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
656 
657   // Don't allow anything with multiple uses.
658   assert(Op.hasOneUse() && "Unknown reuse!");
659 
660   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
661 
662   const SDNodeFlags *Flags = Op.getNode()->getFlags();
663 
664   switch (Op.getOpcode()) {
665   default: llvm_unreachable("Unknown code");
666   case ISD::ConstantFP: {
667     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
668     V.changeSign();
669     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
670   }
671   case ISD::FADD:
672     // FIXME: determine better conditions for this xform.
673     assert(Options.UnsafeFPMath);
674 
675     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
676     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
677                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
678       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
679                          GetNegatedExpression(Op.getOperand(0), DAG,
680                                               LegalOperations, Depth+1),
681                          Op.getOperand(1), Flags);
682     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
683     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
684                        GetNegatedExpression(Op.getOperand(1), DAG,
685                                             LegalOperations, Depth+1),
686                        Op.getOperand(0), Flags);
687   case ISD::FSUB:
688     // We can't turn -(A-B) into B-A when we honor signed zeros.
689     assert(Options.UnsafeFPMath);
690 
691     // fold (fneg (fsub 0, B)) -> B
692     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
693       if (N0CFP->isZero())
694         return Op.getOperand(1);
695 
696     // fold (fneg (fsub A, B)) -> (fsub B, A)
697     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
698                        Op.getOperand(1), Op.getOperand(0), Flags);
699 
700   case ISD::FMUL:
701   case ISD::FDIV:
702     assert(!Options.HonorSignDependentRoundingFPMath());
703 
704     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
705     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
706                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
707       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
708                          GetNegatedExpression(Op.getOperand(0), DAG,
709                                               LegalOperations, Depth+1),
710                          Op.getOperand(1), Flags);
711 
712     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
713     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
714                        Op.getOperand(0),
715                        GetNegatedExpression(Op.getOperand(1), DAG,
716                                             LegalOperations, Depth+1), Flags);
717 
718   case ISD::FP_EXTEND:
719   case ISD::FSIN:
720     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
721                        GetNegatedExpression(Op.getOperand(0), DAG,
722                                             LegalOperations, Depth+1));
723   case ISD::FP_ROUND:
724       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
725                          GetNegatedExpression(Op.getOperand(0), DAG,
726                                               LegalOperations, Depth+1),
727                          Op.getOperand(1));
728   }
729 }
730 
731 // APInts must be the same size for most operations, this helper
732 // function zero extends the shorter of the pair so that they match.
733 // We provide an Offset so that we can create bitwidths that won't overflow.
734 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
735   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
736   LHS = LHS.zextOrSelf(Bits);
737   RHS = RHS.zextOrSelf(Bits);
738 }
739 
740 // Return true if this node is a setcc, or is a select_cc
741 // that selects between the target values used for true and false, making it
742 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
743 // the appropriate nodes based on the type of node we are checking. This
744 // simplifies life a bit for the callers.
745 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
746                                     SDValue &CC) const {
747   if (N.getOpcode() == ISD::SETCC) {
748     LHS = N.getOperand(0);
749     RHS = N.getOperand(1);
750     CC  = N.getOperand(2);
751     return true;
752   }
753 
754   if (N.getOpcode() != ISD::SELECT_CC ||
755       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
756       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
757     return false;
758 
759   if (TLI.getBooleanContents(N.getValueType()) ==
760       TargetLowering::UndefinedBooleanContent)
761     return false;
762 
763   LHS = N.getOperand(0);
764   RHS = N.getOperand(1);
765   CC  = N.getOperand(4);
766   return true;
767 }
768 
769 /// Return true if this is a SetCC-equivalent operation with only one use.
770 /// If this is true, it allows the users to invert the operation for free when
771 /// it is profitable to do so.
772 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
773   SDValue N0, N1, N2;
774   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
775     return true;
776   return false;
777 }
778 
779 // \brief Returns the SDNode if it is a constant float BuildVector
780 // or constant float.
781 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
782   if (isa<ConstantFPSDNode>(N))
783     return N.getNode();
784   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
785     return N.getNode();
786   return nullptr;
787 }
788 
789 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
790 // int.
791 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
792   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
793     return CN;
794 
795   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
796     BitVector UndefElements;
797     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
798 
799     // BuildVectors can truncate their operands. Ignore that case here.
800     // FIXME: We blindly ignore splats which include undef which is overly
801     // pessimistic.
802     if (CN && UndefElements.none() &&
803         CN->getValueType(0) == N.getValueType().getScalarType())
804       return CN;
805   }
806 
807   return nullptr;
808 }
809 
810 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
811 // float.
812 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
813   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
814     return CN;
815 
816   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
817     BitVector UndefElements;
818     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
819 
820     if (CN && UndefElements.none())
821       return CN;
822   }
823 
824   return nullptr;
825 }
826 
827 // Determines if it is a constant integer or a build vector of constant
828 // integers (and undefs).
829 // Do not permit build vector implicit truncation.
830 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
831   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
832     return !(Const->isOpaque() && NoOpaques);
833   if (N.getOpcode() != ISD::BUILD_VECTOR)
834     return false;
835   unsigned BitWidth = N.getScalarValueSizeInBits();
836   for (const SDValue &Op : N->op_values()) {
837     if (Op.isUndef())
838       continue;
839     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
840     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
841         (Const->isOpaque() && NoOpaques))
842       return false;
843   }
844   return true;
845 }
846 
847 // Determines if it is a constant null integer or a splatted vector of a
848 // constant null integer (with no undefs).
849 // Build vector implicit truncation is not an issue for null values.
850 static bool isNullConstantOrNullSplatConstant(SDValue N) {
851   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
852     return Splat->isNullValue();
853   return false;
854 }
855 
856 // Determines if it is a constant integer of one or a splatted vector of a
857 // constant integer of one (with no undefs).
858 // Do not permit build vector implicit truncation.
859 static bool isOneConstantOrOneSplatConstant(SDValue N) {
860   unsigned BitWidth = N.getScalarValueSizeInBits();
861   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
862     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
863   return false;
864 }
865 
866 // Determines if it is a constant integer of all ones or a splatted vector of a
867 // constant integer of all ones (with no undefs).
868 // Do not permit build vector implicit truncation.
869 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
870   unsigned BitWidth = N.getScalarValueSizeInBits();
871   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
872     return Splat->isAllOnesValue() &&
873            Splat->getAPIntValue().getBitWidth() == BitWidth;
874   return false;
875 }
876 
877 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
878                                     SDValue N1) {
879   EVT VT = N0.getValueType();
880   if (N0.getOpcode() == Opc) {
881     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
882       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
883         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
884         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
885           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
886         return SDValue();
887       }
888       if (N0.hasOneUse()) {
889         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
890         // use
891         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
892         if (!OpNode.getNode())
893           return SDValue();
894         AddToWorklist(OpNode.getNode());
895         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
896       }
897     }
898   }
899 
900   if (N1.getOpcode() == Opc) {
901     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
902       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
903         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
904         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
905           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
906         return SDValue();
907       }
908       if (N1.hasOneUse()) {
909         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
910         // use
911         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
912         if (!OpNode.getNode())
913           return SDValue();
914         AddToWorklist(OpNode.getNode());
915         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
916       }
917     }
918   }
919 
920   return SDValue();
921 }
922 
923 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
924                                bool AddTo) {
925   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
926   ++NodesCombined;
927   DEBUG(dbgs() << "\nReplacing.1 ";
928         N->dump(&DAG);
929         dbgs() << "\nWith: ";
930         To[0].getNode()->dump(&DAG);
931         dbgs() << " and " << NumTo-1 << " other values\n");
932   for (unsigned i = 0, e = NumTo; i != e; ++i)
933     assert((!To[i].getNode() ||
934             N->getValueType(i) == To[i].getValueType()) &&
935            "Cannot combine value to value of different type!");
936 
937   WorklistRemover DeadNodes(*this);
938   DAG.ReplaceAllUsesWith(N, To);
939   if (AddTo) {
940     // Push the new nodes and any users onto the worklist
941     for (unsigned i = 0, e = NumTo; i != e; ++i) {
942       if (To[i].getNode()) {
943         AddToWorklist(To[i].getNode());
944         AddUsersToWorklist(To[i].getNode());
945       }
946     }
947   }
948 
949   // Finally, if the node is now dead, remove it from the graph.  The node
950   // may not be dead if the replacement process recursively simplified to
951   // something else needing this node.
952   if (N->use_empty())
953     deleteAndRecombine(N);
954   return SDValue(N, 0);
955 }
956 
957 void DAGCombiner::
958 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
959   // Replace all uses.  If any nodes become isomorphic to other nodes and
960   // are deleted, make sure to remove them from our worklist.
961   WorklistRemover DeadNodes(*this);
962   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
963 
964   // Push the new node and any (possibly new) users onto the worklist.
965   AddToWorklist(TLO.New.getNode());
966   AddUsersToWorklist(TLO.New.getNode());
967 
968   // Finally, if the node is now dead, remove it from the graph.  The node
969   // may not be dead if the replacement process recursively simplified to
970   // something else needing this node.
971   if (TLO.Old.getNode()->use_empty())
972     deleteAndRecombine(TLO.Old.getNode());
973 }
974 
975 /// Check the specified integer node value to see if it can be simplified or if
976 /// things it uses can be simplified by bit propagation. If so, return true.
977 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
978   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
979   APInt KnownZero, KnownOne;
980   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
981     return false;
982 
983   // Revisit the node.
984   AddToWorklist(Op.getNode());
985 
986   // Replace the old value with the new one.
987   ++NodesCombined;
988   DEBUG(dbgs() << "\nReplacing.2 ";
989         TLO.Old.getNode()->dump(&DAG);
990         dbgs() << "\nWith: ";
991         TLO.New.getNode()->dump(&DAG);
992         dbgs() << '\n');
993 
994   CommitTargetLoweringOpt(TLO);
995   return true;
996 }
997 
998 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
999   SDLoc DL(Load);
1000   EVT VT = Load->getValueType(0);
1001   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1002 
1003   DEBUG(dbgs() << "\nReplacing.9 ";
1004         Load->dump(&DAG);
1005         dbgs() << "\nWith: ";
1006         Trunc.getNode()->dump(&DAG);
1007         dbgs() << '\n');
1008   WorklistRemover DeadNodes(*this);
1009   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1010   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1011   deleteAndRecombine(Load);
1012   AddToWorklist(Trunc.getNode());
1013 }
1014 
1015 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1016   Replace = false;
1017   SDLoc DL(Op);
1018   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1019     LoadSDNode *LD = cast<LoadSDNode>(Op);
1020     EVT MemVT = LD->getMemoryVT();
1021     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1022       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1023                                                        : ISD::EXTLOAD)
1024       : LD->getExtensionType();
1025     Replace = true;
1026     return DAG.getExtLoad(ExtType, DL, PVT,
1027                           LD->getChain(), LD->getBasePtr(),
1028                           MemVT, LD->getMemOperand());
1029   }
1030 
1031   unsigned Opc = Op.getOpcode();
1032   switch (Opc) {
1033   default: break;
1034   case ISD::AssertSext:
1035     return DAG.getNode(ISD::AssertSext, DL, PVT,
1036                        SExtPromoteOperand(Op.getOperand(0), PVT),
1037                        Op.getOperand(1));
1038   case ISD::AssertZext:
1039     return DAG.getNode(ISD::AssertZext, DL, PVT,
1040                        ZExtPromoteOperand(Op.getOperand(0), PVT),
1041                        Op.getOperand(1));
1042   case ISD::Constant: {
1043     unsigned ExtOpc =
1044       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1045     return DAG.getNode(ExtOpc, DL, PVT, Op);
1046   }
1047   }
1048 
1049   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1050     return SDValue();
1051   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1052 }
1053 
1054 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1055   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1056     return SDValue();
1057   EVT OldVT = Op.getValueType();
1058   SDLoc DL(Op);
1059   bool Replace = false;
1060   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1061   if (!NewOp.getNode())
1062     return SDValue();
1063   AddToWorklist(NewOp.getNode());
1064 
1065   if (Replace)
1066     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1067   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1068                      DAG.getValueType(OldVT));
1069 }
1070 
1071 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1072   EVT OldVT = Op.getValueType();
1073   SDLoc DL(Op);
1074   bool Replace = false;
1075   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1076   if (!NewOp.getNode())
1077     return SDValue();
1078   AddToWorklist(NewOp.getNode());
1079 
1080   if (Replace)
1081     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1082   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1083 }
1084 
1085 /// Promote the specified integer binary operation if the target indicates it is
1086 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1087 /// i32 since i16 instructions are longer.
1088 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1089   if (!LegalOperations)
1090     return SDValue();
1091 
1092   EVT VT = Op.getValueType();
1093   if (VT.isVector() || !VT.isInteger())
1094     return SDValue();
1095 
1096   // If operation type is 'undesirable', e.g. i16 on x86, consider
1097   // promoting it.
1098   unsigned Opc = Op.getOpcode();
1099   if (TLI.isTypeDesirableForOp(Opc, VT))
1100     return SDValue();
1101 
1102   EVT PVT = VT;
1103   // Consult target whether it is a good idea to promote this operation and
1104   // what's the right type to promote it to.
1105   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1106     assert(PVT != VT && "Don't know what type to promote to!");
1107 
1108     bool Replace0 = false;
1109     SDValue N0 = Op.getOperand(0);
1110     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1111     if (!NN0.getNode())
1112       return SDValue();
1113 
1114     bool Replace1 = false;
1115     SDValue N1 = Op.getOperand(1);
1116     SDValue NN1;
1117     if (N0 == N1)
1118       NN1 = NN0;
1119     else {
1120       NN1 = PromoteOperand(N1, PVT, Replace1);
1121       if (!NN1.getNode())
1122         return SDValue();
1123     }
1124 
1125     AddToWorklist(NN0.getNode());
1126     if (NN1.getNode())
1127       AddToWorklist(NN1.getNode());
1128 
1129     if (Replace0)
1130       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1131     if (Replace1)
1132       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1133 
1134     DEBUG(dbgs() << "\nPromoting ";
1135           Op.getNode()->dump(&DAG));
1136     SDLoc DL(Op);
1137     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1138                        DAG.getNode(Opc, DL, PVT, NN0, NN1));
1139   }
1140   return SDValue();
1141 }
1142 
1143 /// Promote the specified integer shift operation if the target indicates it is
1144 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1145 /// i32 since i16 instructions are longer.
1146 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1147   if (!LegalOperations)
1148     return SDValue();
1149 
1150   EVT VT = Op.getValueType();
1151   if (VT.isVector() || !VT.isInteger())
1152     return SDValue();
1153 
1154   // If operation type is 'undesirable', e.g. i16 on x86, consider
1155   // promoting it.
1156   unsigned Opc = Op.getOpcode();
1157   if (TLI.isTypeDesirableForOp(Opc, VT))
1158     return SDValue();
1159 
1160   EVT PVT = VT;
1161   // Consult target whether it is a good idea to promote this operation and
1162   // what's the right type to promote it to.
1163   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1164     assert(PVT != VT && "Don't know what type to promote to!");
1165 
1166     bool Replace = false;
1167     SDValue N0 = Op.getOperand(0);
1168     if (Opc == ISD::SRA)
1169       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1170     else if (Opc == ISD::SRL)
1171       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1172     else
1173       N0 = PromoteOperand(N0, PVT, Replace);
1174     if (!N0.getNode())
1175       return SDValue();
1176 
1177     AddToWorklist(N0.getNode());
1178     if (Replace)
1179       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1180 
1181     DEBUG(dbgs() << "\nPromoting ";
1182           Op.getNode()->dump(&DAG));
1183     SDLoc DL(Op);
1184     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1185                        DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
1186   }
1187   return SDValue();
1188 }
1189 
1190 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1191   if (!LegalOperations)
1192     return SDValue();
1193 
1194   EVT VT = Op.getValueType();
1195   if (VT.isVector() || !VT.isInteger())
1196     return SDValue();
1197 
1198   // If operation type is 'undesirable', e.g. i16 on x86, consider
1199   // promoting it.
1200   unsigned Opc = Op.getOpcode();
1201   if (TLI.isTypeDesirableForOp(Opc, VT))
1202     return SDValue();
1203 
1204   EVT PVT = VT;
1205   // Consult target whether it is a good idea to promote this operation and
1206   // what's the right type to promote it to.
1207   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1208     assert(PVT != VT && "Don't know what type to promote to!");
1209     // fold (aext (aext x)) -> (aext x)
1210     // fold (aext (zext x)) -> (zext x)
1211     // fold (aext (sext x)) -> (sext x)
1212     DEBUG(dbgs() << "\nPromoting ";
1213           Op.getNode()->dump(&DAG));
1214     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1215   }
1216   return SDValue();
1217 }
1218 
1219 bool DAGCombiner::PromoteLoad(SDValue Op) {
1220   if (!LegalOperations)
1221     return false;
1222 
1223   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1224     return false;
1225 
1226   EVT VT = Op.getValueType();
1227   if (VT.isVector() || !VT.isInteger())
1228     return false;
1229 
1230   // If operation type is 'undesirable', e.g. i16 on x86, consider
1231   // promoting it.
1232   unsigned Opc = Op.getOpcode();
1233   if (TLI.isTypeDesirableForOp(Opc, VT))
1234     return false;
1235 
1236   EVT PVT = VT;
1237   // Consult target whether it is a good idea to promote this operation and
1238   // what's the right type to promote it to.
1239   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240     assert(PVT != VT && "Don't know what type to promote to!");
1241 
1242     SDLoc DL(Op);
1243     SDNode *N = Op.getNode();
1244     LoadSDNode *LD = cast<LoadSDNode>(N);
1245     EVT MemVT = LD->getMemoryVT();
1246     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1247       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1248                                                        : ISD::EXTLOAD)
1249       : LD->getExtensionType();
1250     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1251                                    LD->getChain(), LD->getBasePtr(),
1252                                    MemVT, LD->getMemOperand());
1253     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1254 
1255     DEBUG(dbgs() << "\nPromoting ";
1256           N->dump(&DAG);
1257           dbgs() << "\nTo: ";
1258           Result.getNode()->dump(&DAG);
1259           dbgs() << '\n');
1260     WorklistRemover DeadNodes(*this);
1261     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1262     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1263     deleteAndRecombine(N);
1264     AddToWorklist(Result.getNode());
1265     return true;
1266   }
1267   return false;
1268 }
1269 
1270 /// \brief Recursively delete a node which has no uses and any operands for
1271 /// which it is the only use.
1272 ///
1273 /// Note that this both deletes the nodes and removes them from the worklist.
1274 /// It also adds any nodes who have had a user deleted to the worklist as they
1275 /// may now have only one use and subject to other combines.
1276 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1277   if (!N->use_empty())
1278     return false;
1279 
1280   SmallSetVector<SDNode *, 16> Nodes;
1281   Nodes.insert(N);
1282   do {
1283     N = Nodes.pop_back_val();
1284     if (!N)
1285       continue;
1286 
1287     if (N->use_empty()) {
1288       for (const SDValue &ChildN : N->op_values())
1289         Nodes.insert(ChildN.getNode());
1290 
1291       removeFromWorklist(N);
1292       DAG.DeleteNode(N);
1293     } else {
1294       AddToWorklist(N);
1295     }
1296   } while (!Nodes.empty());
1297   return true;
1298 }
1299 
1300 //===----------------------------------------------------------------------===//
1301 //  Main DAG Combiner implementation
1302 //===----------------------------------------------------------------------===//
1303 
1304 void DAGCombiner::Run(CombineLevel AtLevel) {
1305   // set the instance variables, so that the various visit routines may use it.
1306   Level = AtLevel;
1307   LegalOperations = Level >= AfterLegalizeVectorOps;
1308   LegalTypes = Level >= AfterLegalizeTypes;
1309 
1310   // Add all the dag nodes to the worklist.
1311   for (SDNode &Node : DAG.allnodes())
1312     AddToWorklist(&Node);
1313 
1314   // Create a dummy node (which is not added to allnodes), that adds a reference
1315   // to the root node, preventing it from being deleted, and tracking any
1316   // changes of the root.
1317   HandleSDNode Dummy(DAG.getRoot());
1318 
1319   // While the worklist isn't empty, find a node and try to combine it.
1320   while (!WorklistMap.empty()) {
1321     SDNode *N;
1322     // The Worklist holds the SDNodes in order, but it may contain null entries.
1323     do {
1324       N = Worklist.pop_back_val();
1325     } while (!N);
1326 
1327     bool GoodWorklistEntry = WorklistMap.erase(N);
1328     (void)GoodWorklistEntry;
1329     assert(GoodWorklistEntry &&
1330            "Found a worklist entry without a corresponding map entry!");
1331 
1332     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1333     // N is deleted from the DAG, since they too may now be dead or may have a
1334     // reduced number of uses, allowing other xforms.
1335     if (recursivelyDeleteUnusedNodes(N))
1336       continue;
1337 
1338     WorklistRemover DeadNodes(*this);
1339 
1340     // If this combine is running after legalizing the DAG, re-legalize any
1341     // nodes pulled off the worklist.
1342     if (Level == AfterLegalizeDAG) {
1343       SmallSetVector<SDNode *, 16> UpdatedNodes;
1344       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1345 
1346       for (SDNode *LN : UpdatedNodes) {
1347         AddToWorklist(LN);
1348         AddUsersToWorklist(LN);
1349       }
1350       if (!NIsValid)
1351         continue;
1352     }
1353 
1354     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1355 
1356     // Add any operands of the new node which have not yet been combined to the
1357     // worklist as well. Because the worklist uniques things already, this
1358     // won't repeatedly process the same operand.
1359     CombinedNodes.insert(N);
1360     for (const SDValue &ChildN : N->op_values())
1361       if (!CombinedNodes.count(ChildN.getNode()))
1362         AddToWorklist(ChildN.getNode());
1363 
1364     SDValue RV = combine(N);
1365 
1366     if (!RV.getNode())
1367       continue;
1368 
1369     ++NodesCombined;
1370 
1371     // If we get back the same node we passed in, rather than a new node or
1372     // zero, we know that the node must have defined multiple values and
1373     // CombineTo was used.  Since CombineTo takes care of the worklist
1374     // mechanics for us, we have no work to do in this case.
1375     if (RV.getNode() == N)
1376       continue;
1377 
1378     assert(N->getOpcode() != ISD::DELETED_NODE &&
1379            RV.getOpcode() != ISD::DELETED_NODE &&
1380            "Node was deleted but visit returned new node!");
1381 
1382     DEBUG(dbgs() << " ... into: ";
1383           RV.getNode()->dump(&DAG));
1384 
1385     if (N->getNumValues() == RV.getNode()->getNumValues())
1386       DAG.ReplaceAllUsesWith(N, RV.getNode());
1387     else {
1388       assert(N->getValueType(0) == RV.getValueType() &&
1389              N->getNumValues() == 1 && "Type mismatch");
1390       SDValue OpV = RV;
1391       DAG.ReplaceAllUsesWith(N, &OpV);
1392     }
1393 
1394     // Push the new node and any users onto the worklist
1395     AddToWorklist(RV.getNode());
1396     AddUsersToWorklist(RV.getNode());
1397 
1398     // Finally, if the node is now dead, remove it from the graph.  The node
1399     // may not be dead if the replacement process recursively simplified to
1400     // something else needing this node. This will also take care of adding any
1401     // operands which have lost a user to the worklist.
1402     recursivelyDeleteUnusedNodes(N);
1403   }
1404 
1405   // If the root changed (e.g. it was a dead load, update the root).
1406   DAG.setRoot(Dummy.getValue());
1407   DAG.RemoveDeadNodes();
1408 }
1409 
1410 SDValue DAGCombiner::visit(SDNode *N) {
1411   switch (N->getOpcode()) {
1412   default: break;
1413   case ISD::TokenFactor:        return visitTokenFactor(N);
1414   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1415   case ISD::ADD:                return visitADD(N);
1416   case ISD::SUB:                return visitSUB(N);
1417   case ISD::ADDC:               return visitADDC(N);
1418   case ISD::SUBC:               return visitSUBC(N);
1419   case ISD::ADDE:               return visitADDE(N);
1420   case ISD::SUBE:               return visitSUBE(N);
1421   case ISD::MUL:                return visitMUL(N);
1422   case ISD::SDIV:               return visitSDIV(N);
1423   case ISD::UDIV:               return visitUDIV(N);
1424   case ISD::SREM:
1425   case ISD::UREM:               return visitREM(N);
1426   case ISD::MULHU:              return visitMULHU(N);
1427   case ISD::MULHS:              return visitMULHS(N);
1428   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1429   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1430   case ISD::SMULO:              return visitSMULO(N);
1431   case ISD::UMULO:              return visitUMULO(N);
1432   case ISD::SMIN:
1433   case ISD::SMAX:
1434   case ISD::UMIN:
1435   case ISD::UMAX:               return visitIMINMAX(N);
1436   case ISD::AND:                return visitAND(N);
1437   case ISD::OR:                 return visitOR(N);
1438   case ISD::XOR:                return visitXOR(N);
1439   case ISD::SHL:                return visitSHL(N);
1440   case ISD::SRA:                return visitSRA(N);
1441   case ISD::SRL:                return visitSRL(N);
1442   case ISD::ROTR:
1443   case ISD::ROTL:               return visitRotate(N);
1444   case ISD::BSWAP:              return visitBSWAP(N);
1445   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1446   case ISD::CTLZ:               return visitCTLZ(N);
1447   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1448   case ISD::CTTZ:               return visitCTTZ(N);
1449   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1450   case ISD::CTPOP:              return visitCTPOP(N);
1451   case ISD::SELECT:             return visitSELECT(N);
1452   case ISD::VSELECT:            return visitVSELECT(N);
1453   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1454   case ISD::SETCC:              return visitSETCC(N);
1455   case ISD::SETCCE:             return visitSETCCE(N);
1456   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1457   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1458   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1459   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1460   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1461   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1462   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1463   case ISD::BITCAST:            return visitBITCAST(N);
1464   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1465   case ISD::FADD:               return visitFADD(N);
1466   case ISD::FSUB:               return visitFSUB(N);
1467   case ISD::FMUL:               return visitFMUL(N);
1468   case ISD::FMA:                return visitFMA(N);
1469   case ISD::FDIV:               return visitFDIV(N);
1470   case ISD::FREM:               return visitFREM(N);
1471   case ISD::FSQRT:              return visitFSQRT(N);
1472   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1473   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1474   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1475   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1476   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1477   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1478   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1479   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1480   case ISD::FNEG:               return visitFNEG(N);
1481   case ISD::FABS:               return visitFABS(N);
1482   case ISD::FFLOOR:             return visitFFLOOR(N);
1483   case ISD::FMINNUM:            return visitFMINNUM(N);
1484   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1485   case ISD::FCEIL:              return visitFCEIL(N);
1486   case ISD::FTRUNC:             return visitFTRUNC(N);
1487   case ISD::BRCOND:             return visitBRCOND(N);
1488   case ISD::BR_CC:              return visitBR_CC(N);
1489   case ISD::LOAD:               return visitLOAD(N);
1490   case ISD::STORE:              return visitSTORE(N);
1491   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1492   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1493   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1494   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1495   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1496   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1497   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1498   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1499   case ISD::MGATHER:            return visitMGATHER(N);
1500   case ISD::MLOAD:              return visitMLOAD(N);
1501   case ISD::MSCATTER:           return visitMSCATTER(N);
1502   case ISD::MSTORE:             return visitMSTORE(N);
1503   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1504   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1505   }
1506   return SDValue();
1507 }
1508 
1509 SDValue DAGCombiner::combine(SDNode *N) {
1510   SDValue RV = visit(N);
1511 
1512   // If nothing happened, try a target-specific DAG combine.
1513   if (!RV.getNode()) {
1514     assert(N->getOpcode() != ISD::DELETED_NODE &&
1515            "Node was deleted but visit returned NULL!");
1516 
1517     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1518         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1519 
1520       // Expose the DAG combiner to the target combiner impls.
1521       TargetLowering::DAGCombinerInfo
1522         DagCombineInfo(DAG, Level, false, this);
1523 
1524       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1525     }
1526   }
1527 
1528   // If nothing happened still, try promoting the operation.
1529   if (!RV.getNode()) {
1530     switch (N->getOpcode()) {
1531     default: break;
1532     case ISD::ADD:
1533     case ISD::SUB:
1534     case ISD::MUL:
1535     case ISD::AND:
1536     case ISD::OR:
1537     case ISD::XOR:
1538       RV = PromoteIntBinOp(SDValue(N, 0));
1539       break;
1540     case ISD::SHL:
1541     case ISD::SRA:
1542     case ISD::SRL:
1543       RV = PromoteIntShiftOp(SDValue(N, 0));
1544       break;
1545     case ISD::SIGN_EXTEND:
1546     case ISD::ZERO_EXTEND:
1547     case ISD::ANY_EXTEND:
1548       RV = PromoteExtend(SDValue(N, 0));
1549       break;
1550     case ISD::LOAD:
1551       if (PromoteLoad(SDValue(N, 0)))
1552         RV = SDValue(N, 0);
1553       break;
1554     }
1555   }
1556 
1557   // If N is a commutative binary node, try commuting it to enable more
1558   // sdisel CSE.
1559   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1560       N->getNumValues() == 1) {
1561     SDValue N0 = N->getOperand(0);
1562     SDValue N1 = N->getOperand(1);
1563 
1564     // Constant operands are canonicalized to RHS.
1565     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1566       SDValue Ops[] = {N1, N0};
1567       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1568                                             N->getFlags());
1569       if (CSENode)
1570         return SDValue(CSENode, 0);
1571     }
1572   }
1573 
1574   return RV;
1575 }
1576 
1577 /// Given a node, return its input chain if it has one, otherwise return a null
1578 /// sd operand.
1579 static SDValue getInputChainForNode(SDNode *N) {
1580   if (unsigned NumOps = N->getNumOperands()) {
1581     if (N->getOperand(0).getValueType() == MVT::Other)
1582       return N->getOperand(0);
1583     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1584       return N->getOperand(NumOps-1);
1585     for (unsigned i = 1; i < NumOps-1; ++i)
1586       if (N->getOperand(i).getValueType() == MVT::Other)
1587         return N->getOperand(i);
1588   }
1589   return SDValue();
1590 }
1591 
1592 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1593   // If N has two operands, where one has an input chain equal to the other,
1594   // the 'other' chain is redundant.
1595   if (N->getNumOperands() == 2) {
1596     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1597       return N->getOperand(0);
1598     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1599       return N->getOperand(1);
1600   }
1601 
1602   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1603   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
1604   SmallPtrSet<SDNode*, 16> SeenOps;
1605   bool Changed = false;             // If we should replace this token factor.
1606 
1607   // Start out with this token factor.
1608   TFs.push_back(N);
1609 
1610   // Iterate through token factors.  The TFs grows when new token factors are
1611   // encountered.
1612   for (unsigned i = 0; i < TFs.size(); ++i) {
1613     SDNode *TF = TFs[i];
1614 
1615     // Check each of the operands.
1616     for (const SDValue &Op : TF->op_values()) {
1617 
1618       switch (Op.getOpcode()) {
1619       case ISD::EntryToken:
1620         // Entry tokens don't need to be added to the list. They are
1621         // redundant.
1622         Changed = true;
1623         break;
1624 
1625       case ISD::TokenFactor:
1626         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1627           // Queue up for processing.
1628           TFs.push_back(Op.getNode());
1629           // Clean up in case the token factor is removed.
1630           AddToWorklist(Op.getNode());
1631           Changed = true;
1632           break;
1633         }
1634         LLVM_FALLTHROUGH;
1635 
1636       default:
1637         // Only add if it isn't already in the list.
1638         if (SeenOps.insert(Op.getNode()).second)
1639           Ops.push_back(Op);
1640         else
1641           Changed = true;
1642         break;
1643       }
1644     }
1645   }
1646 
1647   SDValue Result;
1648 
1649   // If we've changed things around then replace token factor.
1650   if (Changed) {
1651     if (Ops.empty()) {
1652       // The entry token is the only possible outcome.
1653       Result = DAG.getEntryNode();
1654     } else {
1655       // New and improved token factor.
1656       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1657     }
1658 
1659     // Add users to worklist if AA is enabled, since it may introduce
1660     // a lot of new chained token factors while removing memory deps.
1661     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1662       : DAG.getSubtarget().useAA();
1663     return CombineTo(N, Result, UseAA /*add to worklist*/);
1664   }
1665 
1666   return Result;
1667 }
1668 
1669 /// MERGE_VALUES can always be eliminated.
1670 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1671   WorklistRemover DeadNodes(*this);
1672   // Replacing results may cause a different MERGE_VALUES to suddenly
1673   // be CSE'd with N, and carry its uses with it. Iterate until no
1674   // uses remain, to ensure that the node can be safely deleted.
1675   // First add the users of this node to the work list so that they
1676   // can be tried again once they have new operands.
1677   AddUsersToWorklist(N);
1678   do {
1679     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1680       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1681   } while (!N->use_empty());
1682   deleteAndRecombine(N);
1683   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1684 }
1685 
1686 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1687 /// ConstantSDNode pointer else nullptr.
1688 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1689   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1690   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1691 }
1692 
1693 SDValue DAGCombiner::visitADD(SDNode *N) {
1694   SDValue N0 = N->getOperand(0);
1695   SDValue N1 = N->getOperand(1);
1696   EVT VT = N0.getValueType();
1697   SDLoc DL(N);
1698 
1699   // fold vector ops
1700   if (VT.isVector()) {
1701     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1702       return FoldedVOp;
1703 
1704     // fold (add x, 0) -> x, vector edition
1705     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1706       return N0;
1707     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1708       return N1;
1709   }
1710 
1711   // fold (add x, undef) -> undef
1712   if (N0.isUndef())
1713     return N0;
1714 
1715   if (N1.isUndef())
1716     return N1;
1717 
1718   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1719     // canonicalize constant to RHS
1720     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1721       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1722     // fold (add c1, c2) -> c1+c2
1723     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1724                                       N1.getNode());
1725   }
1726 
1727   // fold (add x, 0) -> x
1728   if (isNullConstant(N1))
1729     return N0;
1730 
1731   // fold ((c1-A)+c2) -> (c1+c2)-A
1732   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1733     if (N0.getOpcode() == ISD::SUB)
1734       if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1735         return DAG.getNode(ISD::SUB, DL, VT,
1736                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1737                            N0.getOperand(1));
1738       }
1739   }
1740 
1741   // reassociate add
1742   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1743     return RADD;
1744 
1745   // fold ((0-A) + B) -> B-A
1746   if (N0.getOpcode() == ISD::SUB &&
1747       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1748     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1749 
1750   // fold (A + (0-B)) -> A-B
1751   if (N1.getOpcode() == ISD::SUB &&
1752       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1753     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1754 
1755   // fold (A+(B-A)) -> B
1756   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1757     return N1.getOperand(0);
1758 
1759   // fold ((B-A)+A) -> B
1760   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1761     return N0.getOperand(0);
1762 
1763   // fold (A+(B-(A+C))) to (B-C)
1764   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1765       N0 == N1.getOperand(1).getOperand(0))
1766     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1767                        N1.getOperand(1).getOperand(1));
1768 
1769   // fold (A+(B-(C+A))) to (B-C)
1770   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1771       N0 == N1.getOperand(1).getOperand(1))
1772     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1773                        N1.getOperand(1).getOperand(0));
1774 
1775   // fold (A+((B-A)+or-C)) to (B+or-C)
1776   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1777       N1.getOperand(0).getOpcode() == ISD::SUB &&
1778       N0 == N1.getOperand(0).getOperand(1))
1779     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1780                        N1.getOperand(1));
1781 
1782   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1783   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1784     SDValue N00 = N0.getOperand(0);
1785     SDValue N01 = N0.getOperand(1);
1786     SDValue N10 = N1.getOperand(0);
1787     SDValue N11 = N1.getOperand(1);
1788 
1789     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1790       return DAG.getNode(ISD::SUB, DL, VT,
1791                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1792                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1793   }
1794 
1795   if (SimplifyDemandedBits(SDValue(N, 0)))
1796     return SDValue(N, 0);
1797 
1798   // fold (a+b) -> (a|b) iff a and b share no bits.
1799   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1800       VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
1801     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1802 
1803   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1804   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1805       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
1806     return DAG.getNode(ISD::SUB, DL, VT, N0,
1807                        DAG.getNode(ISD::SHL, DL, VT,
1808                                    N1.getOperand(0).getOperand(1),
1809                                    N1.getOperand(1)));
1810   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1811       isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
1812     return DAG.getNode(ISD::SUB, DL, VT, N1,
1813                        DAG.getNode(ISD::SHL, DL, VT,
1814                                    N0.getOperand(0).getOperand(1),
1815                                    N0.getOperand(1)));
1816 
1817   if (N1.getOpcode() == ISD::AND) {
1818     SDValue AndOp0 = N1.getOperand(0);
1819     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1820     unsigned DestBits = VT.getScalarSizeInBits();
1821 
1822     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1823     // and similar xforms where the inner op is either ~0 or 0.
1824     if (NumSignBits == DestBits &&
1825         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
1826       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1827   }
1828 
1829   // add (sext i1), X -> sub X, (zext i1)
1830   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1831       N0.getOperand(0).getValueType() == MVT::i1 &&
1832       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1833     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1834     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1835   }
1836 
1837   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1838   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1839     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1840     if (TN->getVT() == MVT::i1) {
1841       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1842                                  DAG.getConstant(1, DL, VT));
1843       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1844     }
1845   }
1846 
1847   return SDValue();
1848 }
1849 
1850 SDValue DAGCombiner::visitADDC(SDNode *N) {
1851   SDValue N0 = N->getOperand(0);
1852   SDValue N1 = N->getOperand(1);
1853   EVT VT = N0.getValueType();
1854 
1855   // If the flag result is dead, turn this into an ADD.
1856   if (!N->hasAnyUseOfValue(1))
1857     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1858                      DAG.getNode(ISD::CARRY_FALSE,
1859                                  SDLoc(N), MVT::Glue));
1860 
1861   // canonicalize constant to RHS.
1862   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1863   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1864   if (N0C && !N1C)
1865     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1866 
1867   // fold (addc x, 0) -> x + no carry out
1868   if (isNullConstant(N1))
1869     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1870                                         SDLoc(N), MVT::Glue));
1871 
1872   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1873   APInt LHSZero, LHSOne;
1874   APInt RHSZero, RHSOne;
1875   DAG.computeKnownBits(N0, LHSZero, LHSOne);
1876 
1877   if (LHSZero.getBoolValue()) {
1878     DAG.computeKnownBits(N1, RHSZero, RHSOne);
1879 
1880     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1881     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1882     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1883       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1884                        DAG.getNode(ISD::CARRY_FALSE,
1885                                    SDLoc(N), MVT::Glue));
1886   }
1887 
1888   return SDValue();
1889 }
1890 
1891 SDValue DAGCombiner::visitADDE(SDNode *N) {
1892   SDValue N0 = N->getOperand(0);
1893   SDValue N1 = N->getOperand(1);
1894   SDValue CarryIn = N->getOperand(2);
1895 
1896   // canonicalize constant to RHS
1897   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1898   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1899   if (N0C && !N1C)
1900     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1901                        N1, N0, CarryIn);
1902 
1903   // fold (adde x, y, false) -> (addc x, y)
1904   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1905     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1906 
1907   return SDValue();
1908 }
1909 
1910 // Since it may not be valid to emit a fold to zero for vector initializers
1911 // check if we can before folding.
1912 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
1913                              SelectionDAG &DAG, bool LegalOperations,
1914                              bool LegalTypes) {
1915   if (!VT.isVector())
1916     return DAG.getConstant(0, DL, VT);
1917   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1918     return DAG.getConstant(0, DL, VT);
1919   return SDValue();
1920 }
1921 
1922 SDValue DAGCombiner::visitSUB(SDNode *N) {
1923   SDValue N0 = N->getOperand(0);
1924   SDValue N1 = N->getOperand(1);
1925   EVT VT = N0.getValueType();
1926   SDLoc DL(N);
1927 
1928   // fold vector ops
1929   if (VT.isVector()) {
1930     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1931       return FoldedVOp;
1932 
1933     // fold (sub x, 0) -> x, vector edition
1934     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1935       return N0;
1936   }
1937 
1938   // fold (sub x, x) -> 0
1939   // FIXME: Refactor this and xor and other similar operations together.
1940   if (N0 == N1)
1941     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
1942   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
1943       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1944     // fold (sub c1, c2) -> c1-c2
1945     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
1946                                       N1.getNode());
1947   }
1948 
1949   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
1950 
1951   // fold (sub x, c) -> (add x, -c)
1952   if (N1C) {
1953     return DAG.getNode(ISD::ADD, DL, VT, N0,
1954                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1955   }
1956 
1957   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1958   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
1959     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
1960 
1961   // fold A-(A-B) -> B
1962   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1963     return N1.getOperand(1);
1964 
1965   // fold (A+B)-A -> B
1966   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1967     return N0.getOperand(1);
1968 
1969   // fold (A+B)-B -> A
1970   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1971     return N0.getOperand(0);
1972 
1973   // fold C2-(A+C1) -> (C2-C1)-A
1974   if (N1.getOpcode() == ISD::ADD) {
1975     SDValue N11 = N1.getOperand(1);
1976     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
1977         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
1978       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
1979       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
1980     }
1981   }
1982 
1983   // fold ((A+(B+or-C))-B) -> A+or-C
1984   if (N0.getOpcode() == ISD::ADD &&
1985       (N0.getOperand(1).getOpcode() == ISD::SUB ||
1986        N0.getOperand(1).getOpcode() == ISD::ADD) &&
1987       N0.getOperand(1).getOperand(0) == N1)
1988     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
1989                        N0.getOperand(1).getOperand(1));
1990 
1991   // fold ((A+(C+B))-B) -> A+C
1992   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
1993       N0.getOperand(1).getOperand(1) == N1)
1994     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
1995                        N0.getOperand(1).getOperand(0));
1996 
1997   // fold ((A-(B-C))-C) -> A-B
1998   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
1999       N0.getOperand(1).getOperand(1) == N1)
2000     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2001                        N0.getOperand(1).getOperand(0));
2002 
2003   // If either operand of a sub is undef, the result is undef
2004   if (N0.isUndef())
2005     return N0;
2006   if (N1.isUndef())
2007     return N1;
2008 
2009   // If the relocation model supports it, consider symbol offsets.
2010   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2011     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2012       // fold (sub Sym, c) -> Sym-c
2013       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2014         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2015                                     GA->getOffset() -
2016                                         (uint64_t)N1C->getSExtValue());
2017       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2018       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2019         if (GA->getGlobal() == GB->getGlobal())
2020           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2021                                  DL, VT);
2022     }
2023 
2024   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2025   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2026     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2027     if (TN->getVT() == MVT::i1) {
2028       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2029                                  DAG.getConstant(1, DL, VT));
2030       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2031     }
2032   }
2033 
2034   return SDValue();
2035 }
2036 
2037 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2038   SDValue N0 = N->getOperand(0);
2039   SDValue N1 = N->getOperand(1);
2040   EVT VT = N0.getValueType();
2041   SDLoc DL(N);
2042 
2043   // If the flag result is dead, turn this into an SUB.
2044   if (!N->hasAnyUseOfValue(1))
2045     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2046                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2047 
2048   // fold (subc x, x) -> 0 + no borrow
2049   if (N0 == N1)
2050     return CombineTo(N, DAG.getConstant(0, DL, VT),
2051                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2052 
2053   // fold (subc x, 0) -> x + no borrow
2054   if (isNullConstant(N1))
2055     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2056 
2057   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2058   if (isAllOnesConstant(N0))
2059     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2060                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2061 
2062   return SDValue();
2063 }
2064 
2065 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2066   SDValue N0 = N->getOperand(0);
2067   SDValue N1 = N->getOperand(1);
2068   SDValue CarryIn = N->getOperand(2);
2069 
2070   // fold (sube x, y, false) -> (subc x, y)
2071   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2072     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2073 
2074   return SDValue();
2075 }
2076 
2077 SDValue DAGCombiner::visitMUL(SDNode *N) {
2078   SDValue N0 = N->getOperand(0);
2079   SDValue N1 = N->getOperand(1);
2080   EVT VT = N0.getValueType();
2081 
2082   // fold (mul x, undef) -> 0
2083   if (N0.isUndef() || N1.isUndef())
2084     return DAG.getConstant(0, SDLoc(N), VT);
2085 
2086   bool N0IsConst = false;
2087   bool N1IsConst = false;
2088   bool N1IsOpaqueConst = false;
2089   bool N0IsOpaqueConst = false;
2090   APInt ConstValue0, ConstValue1;
2091   // fold vector ops
2092   if (VT.isVector()) {
2093     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2094       return FoldedVOp;
2095 
2096     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2097     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2098   } else {
2099     N0IsConst = isa<ConstantSDNode>(N0);
2100     if (N0IsConst) {
2101       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2102       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2103     }
2104     N1IsConst = isa<ConstantSDNode>(N1);
2105     if (N1IsConst) {
2106       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2107       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2108     }
2109   }
2110 
2111   // fold (mul c1, c2) -> c1*c2
2112   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2113     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2114                                       N0.getNode(), N1.getNode());
2115 
2116   // canonicalize constant to RHS (vector doesn't have to splat)
2117   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2118      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2119     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2120   // fold (mul x, 0) -> 0
2121   if (N1IsConst && ConstValue1 == 0)
2122     return N1;
2123   // We require a splat of the entire scalar bit width for non-contiguous
2124   // bit patterns.
2125   bool IsFullSplat =
2126     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2127   // fold (mul x, 1) -> x
2128   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2129     return N0;
2130   // fold (mul x, -1) -> 0-x
2131   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2132     SDLoc DL(N);
2133     return DAG.getNode(ISD::SUB, DL, VT,
2134                        DAG.getConstant(0, DL, VT), N0);
2135   }
2136   // fold (mul x, (1 << c)) -> x << c
2137   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2138       IsFullSplat) {
2139     SDLoc DL(N);
2140     return DAG.getNode(ISD::SHL, DL, VT, N0,
2141                        DAG.getConstant(ConstValue1.logBase2(), DL,
2142                                        getShiftAmountTy(N0.getValueType())));
2143   }
2144   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2145   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2146       IsFullSplat) {
2147     unsigned Log2Val = (-ConstValue1).logBase2();
2148     SDLoc DL(N);
2149     // FIXME: If the input is something that is easily negated (e.g. a
2150     // single-use add), we should put the negate there.
2151     return DAG.getNode(ISD::SUB, DL, VT,
2152                        DAG.getConstant(0, DL, VT),
2153                        DAG.getNode(ISD::SHL, DL, VT, N0,
2154                             DAG.getConstant(Log2Val, DL,
2155                                       getShiftAmountTy(N0.getValueType()))));
2156   }
2157 
2158   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2159   if (N0.getOpcode() == ISD::SHL &&
2160       isConstantOrConstantVector(N1) &&
2161       isConstantOrConstantVector(N0.getOperand(1))) {
2162     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2163     AddToWorklist(C3.getNode());
2164     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2165   }
2166 
2167   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2168   // use.
2169   {
2170     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2171 
2172     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2173     if (N0.getOpcode() == ISD::SHL &&
2174         isConstantOrConstantVector(N0.getOperand(1)) &&
2175         N0.getNode()->hasOneUse()) {
2176       Sh = N0; Y = N1;
2177     } else if (N1.getOpcode() == ISD::SHL &&
2178                isConstantOrConstantVector(N1.getOperand(1)) &&
2179                N1.getNode()->hasOneUse()) {
2180       Sh = N1; Y = N0;
2181     }
2182 
2183     if (Sh.getNode()) {
2184       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2185       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2186     }
2187   }
2188 
2189   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2190   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2191       N0.getOpcode() == ISD::ADD &&
2192       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2193       isMulAddWithConstProfitable(N, N0, N1))
2194       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2195                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2196                                      N0.getOperand(0), N1),
2197                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2198                                      N0.getOperand(1), N1));
2199 
2200   // reassociate mul
2201   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2202     return RMUL;
2203 
2204   return SDValue();
2205 }
2206 
2207 /// Return true if divmod libcall is available.
2208 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2209                                      const TargetLowering &TLI) {
2210   RTLIB::Libcall LC;
2211   EVT NodeType = Node->getValueType(0);
2212   if (!NodeType.isSimple())
2213     return false;
2214   switch (NodeType.getSimpleVT().SimpleTy) {
2215   default: return false; // No libcall for vector types.
2216   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2217   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2218   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2219   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2220   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2221   }
2222 
2223   return TLI.getLibcallName(LC) != nullptr;
2224 }
2225 
2226 /// Issue divrem if both quotient and remainder are needed.
2227 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2228   if (Node->use_empty())
2229     return SDValue(); // This is a dead node, leave it alone.
2230 
2231   unsigned Opcode = Node->getOpcode();
2232   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2233   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2234 
2235   // DivMod lib calls can still work on non-legal types if using lib-calls.
2236   EVT VT = Node->getValueType(0);
2237   if (VT.isVector() || !VT.isInteger())
2238     return SDValue();
2239 
2240   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2241     return SDValue();
2242 
2243   // If DIVREM is going to get expanded into a libcall,
2244   // but there is no libcall available, then don't combine.
2245   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2246       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2247     return SDValue();
2248 
2249   // If div is legal, it's better to do the normal expansion
2250   unsigned OtherOpcode = 0;
2251   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2252     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2253     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2254       return SDValue();
2255   } else {
2256     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2257     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2258       return SDValue();
2259   }
2260 
2261   SDValue Op0 = Node->getOperand(0);
2262   SDValue Op1 = Node->getOperand(1);
2263   SDValue combined;
2264   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2265          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2266     SDNode *User = *UI;
2267     if (User == Node || User->use_empty())
2268       continue;
2269     // Convert the other matching node(s), too;
2270     // otherwise, the DIVREM may get target-legalized into something
2271     // target-specific that we won't be able to recognize.
2272     unsigned UserOpc = User->getOpcode();
2273     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2274         User->getOperand(0) == Op0 &&
2275         User->getOperand(1) == Op1) {
2276       if (!combined) {
2277         if (UserOpc == OtherOpcode) {
2278           SDVTList VTs = DAG.getVTList(VT, VT);
2279           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2280         } else if (UserOpc == DivRemOpc) {
2281           combined = SDValue(User, 0);
2282         } else {
2283           assert(UserOpc == Opcode);
2284           continue;
2285         }
2286       }
2287       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2288         CombineTo(User, combined);
2289       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2290         CombineTo(User, combined.getValue(1));
2291     }
2292   }
2293   return combined;
2294 }
2295 
2296 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2297   SDValue N0 = N->getOperand(0);
2298   SDValue N1 = N->getOperand(1);
2299   EVT VT = N->getValueType(0);
2300 
2301   // fold vector ops
2302   if (VT.isVector())
2303     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2304       return FoldedVOp;
2305 
2306   SDLoc DL(N);
2307 
2308   // fold (sdiv c1, c2) -> c1/c2
2309   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2310   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2311   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2312     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2313   // fold (sdiv X, 1) -> X
2314   if (N1C && N1C->isOne())
2315     return N0;
2316   // fold (sdiv X, -1) -> 0-X
2317   if (N1C && N1C->isAllOnesValue())
2318     return DAG.getNode(ISD::SUB, DL, VT,
2319                        DAG.getConstant(0, DL, VT), N0);
2320 
2321   // If we know the sign bits of both operands are zero, strength reduce to a
2322   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2323   if (!VT.isVector()) {
2324     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2325       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2326   }
2327 
2328   // fold (sdiv X, pow2) -> simple ops after legalize
2329   // FIXME: We check for the exact bit here because the generic lowering gives
2330   // better results in that case. The target-specific lowering should learn how
2331   // to handle exact sdivs efficiently.
2332   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2333       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2334       (N1C->getAPIntValue().isPowerOf2() ||
2335        (-N1C->getAPIntValue()).isPowerOf2())) {
2336     // Target-specific implementation of sdiv x, pow2.
2337     if (SDValue Res = BuildSDIVPow2(N))
2338       return Res;
2339 
2340     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2341 
2342     // Splat the sign bit into the register
2343     SDValue SGN =
2344         DAG.getNode(ISD::SRA, DL, VT, N0,
2345                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2346                                     getShiftAmountTy(N0.getValueType())));
2347     AddToWorklist(SGN.getNode());
2348 
2349     // Add (N0 < 0) ? abs2 - 1 : 0;
2350     SDValue SRL =
2351         DAG.getNode(ISD::SRL, DL, VT, SGN,
2352                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2353                                     getShiftAmountTy(SGN.getValueType())));
2354     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2355     AddToWorklist(SRL.getNode());
2356     AddToWorklist(ADD.getNode());    // Divide by pow2
2357     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2358                   DAG.getConstant(lg2, DL,
2359                                   getShiftAmountTy(ADD.getValueType())));
2360 
2361     // If we're dividing by a positive value, we're done.  Otherwise, we must
2362     // negate the result.
2363     if (N1C->getAPIntValue().isNonNegative())
2364       return SRA;
2365 
2366     AddToWorklist(SRA.getNode());
2367     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2368   }
2369 
2370   // If integer divide is expensive and we satisfy the requirements, emit an
2371   // alternate sequence.  Targets may check function attributes for size/speed
2372   // trade-offs.
2373   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2374   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2375     if (SDValue Op = BuildSDIV(N))
2376       return Op;
2377 
2378   // sdiv, srem -> sdivrem
2379   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2380   // Otherwise, we break the simplification logic in visitREM().
2381   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2382     if (SDValue DivRem = useDivRem(N))
2383         return DivRem;
2384 
2385   // undef / X -> 0
2386   if (N0.isUndef())
2387     return DAG.getConstant(0, DL, VT);
2388   // X / undef -> undef
2389   if (N1.isUndef())
2390     return N1;
2391 
2392   return SDValue();
2393 }
2394 
2395 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2396   SDValue N0 = N->getOperand(0);
2397   SDValue N1 = N->getOperand(1);
2398   EVT VT = N->getValueType(0);
2399 
2400   // fold vector ops
2401   if (VT.isVector())
2402     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2403       return FoldedVOp;
2404 
2405   SDLoc DL(N);
2406 
2407   // fold (udiv c1, c2) -> c1/c2
2408   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2409   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2410   if (N0C && N1C)
2411     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2412                                                     N0C, N1C))
2413       return Folded;
2414   // fold (udiv x, (1 << c)) -> x >>u c
2415   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
2416     return DAG.getNode(ISD::SRL, DL, VT, N0,
2417                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
2418                                        getShiftAmountTy(N0.getValueType())));
2419 
2420   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2421   if (N1.getOpcode() == ISD::SHL) {
2422     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2423       if (SHC->getAPIntValue().isPowerOf2()) {
2424         EVT ADDVT = N1.getOperand(1).getValueType();
2425         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
2426                                   N1.getOperand(1),
2427                                   DAG.getConstant(SHC->getAPIntValue()
2428                                                                   .logBase2(),
2429                                                   DL, ADDVT));
2430         AddToWorklist(Add.getNode());
2431         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2432       }
2433     }
2434   }
2435 
2436   // fold (udiv x, c) -> alternate
2437   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2438   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2439     if (SDValue Op = BuildUDIV(N))
2440       return Op;
2441 
2442   // sdiv, srem -> sdivrem
2443   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2444   // Otherwise, we break the simplification logic in visitREM().
2445   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2446     if (SDValue DivRem = useDivRem(N))
2447         return DivRem;
2448 
2449   // undef / X -> 0
2450   if (N0.isUndef())
2451     return DAG.getConstant(0, DL, VT);
2452   // X / undef -> undef
2453   if (N1.isUndef())
2454     return N1;
2455 
2456   return SDValue();
2457 }
2458 
2459 // handles ISD::SREM and ISD::UREM
2460 SDValue DAGCombiner::visitREM(SDNode *N) {
2461   unsigned Opcode = N->getOpcode();
2462   SDValue N0 = N->getOperand(0);
2463   SDValue N1 = N->getOperand(1);
2464   EVT VT = N->getValueType(0);
2465   bool isSigned = (Opcode == ISD::SREM);
2466   SDLoc DL(N);
2467 
2468   // fold (rem c1, c2) -> c1%c2
2469   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2470   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2471   if (N0C && N1C)
2472     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2473       return Folded;
2474 
2475   if (isSigned) {
2476     // If we know the sign bits of both operands are zero, strength reduce to a
2477     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2478     if (!VT.isVector()) {
2479       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2480         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2481     }
2482   } else {
2483     // fold (urem x, pow2) -> (and x, pow2-1)
2484     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2485         N1C->getAPIntValue().isPowerOf2()) {
2486       return DAG.getNode(ISD::AND, DL, VT, N0,
2487                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
2488     }
2489     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2490     if (N1.getOpcode() == ISD::SHL) {
2491       ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
2492       if (SHC && SHC->getAPIntValue().isPowerOf2()) {
2493         APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
2494         SDValue Add =
2495             DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
2496         AddToWorklist(Add.getNode());
2497         return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2498       }
2499     }
2500   }
2501 
2502   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2503 
2504   // If X/C can be simplified by the division-by-constant logic, lower
2505   // X%C to the equivalent of X-X/C*C.
2506   // To avoid mangling nodes, this simplification requires that the combine()
2507   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2508   // against this by skipping the simplification if isIntDivCheap().  When
2509   // div is not cheap, combine will not return a DIVREM.  Regardless,
2510   // checking cheapness here makes sense since the simplification results in
2511   // fatter code.
2512   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2513     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2514     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2515     AddToWorklist(Div.getNode());
2516     SDValue OptimizedDiv = combine(Div.getNode());
2517     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2518       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2519              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2520       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2521       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2522       AddToWorklist(Mul.getNode());
2523       return Sub;
2524     }
2525   }
2526 
2527   // sdiv, srem -> sdivrem
2528   if (SDValue DivRem = useDivRem(N))
2529     return DivRem.getValue(1);
2530 
2531   // undef % X -> 0
2532   if (N0.isUndef())
2533     return DAG.getConstant(0, DL, VT);
2534   // X % undef -> undef
2535   if (N1.isUndef())
2536     return N1;
2537 
2538   return SDValue();
2539 }
2540 
2541 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2542   SDValue N0 = N->getOperand(0);
2543   SDValue N1 = N->getOperand(1);
2544   EVT VT = N->getValueType(0);
2545   SDLoc DL(N);
2546 
2547   // fold (mulhs x, 0) -> 0
2548   if (isNullConstant(N1))
2549     return N1;
2550   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2551   if (isOneConstant(N1)) {
2552     SDLoc DL(N);
2553     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2554                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
2555                                        getShiftAmountTy(N0.getValueType())));
2556   }
2557   // fold (mulhs x, undef) -> 0
2558   if (N0.isUndef() || N1.isUndef())
2559     return DAG.getConstant(0, SDLoc(N), VT);
2560 
2561   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2562   // plus a shift.
2563   if (VT.isSimple() && !VT.isVector()) {
2564     MVT Simple = VT.getSimpleVT();
2565     unsigned SimpleSize = Simple.getSizeInBits();
2566     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2567     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2568       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2569       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2570       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2571       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2572             DAG.getConstant(SimpleSize, DL,
2573                             getShiftAmountTy(N1.getValueType())));
2574       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2575     }
2576   }
2577 
2578   return SDValue();
2579 }
2580 
2581 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2582   SDValue N0 = N->getOperand(0);
2583   SDValue N1 = N->getOperand(1);
2584   EVT VT = N->getValueType(0);
2585   SDLoc DL(N);
2586 
2587   // fold (mulhu x, 0) -> 0
2588   if (isNullConstant(N1))
2589     return N1;
2590   // fold (mulhu x, 1) -> 0
2591   if (isOneConstant(N1))
2592     return DAG.getConstant(0, DL, N0.getValueType());
2593   // fold (mulhu x, undef) -> 0
2594   if (N0.isUndef() || N1.isUndef())
2595     return DAG.getConstant(0, DL, VT);
2596 
2597   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2598   // plus a shift.
2599   if (VT.isSimple() && !VT.isVector()) {
2600     MVT Simple = VT.getSimpleVT();
2601     unsigned SimpleSize = Simple.getSizeInBits();
2602     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2603     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2604       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2605       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2606       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2607       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2608             DAG.getConstant(SimpleSize, DL,
2609                             getShiftAmountTy(N1.getValueType())));
2610       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2611     }
2612   }
2613 
2614   return SDValue();
2615 }
2616 
2617 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2618 /// give the opcodes for the two computations that are being performed. Return
2619 /// true if a simplification was made.
2620 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2621                                                 unsigned HiOp) {
2622   // If the high half is not needed, just compute the low half.
2623   bool HiExists = N->hasAnyUseOfValue(1);
2624   if (!HiExists &&
2625       (!LegalOperations ||
2626        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2627     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2628     return CombineTo(N, Res, Res);
2629   }
2630 
2631   // If the low half is not needed, just compute the high half.
2632   bool LoExists = N->hasAnyUseOfValue(0);
2633   if (!LoExists &&
2634       (!LegalOperations ||
2635        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2636     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2637     return CombineTo(N, Res, Res);
2638   }
2639 
2640   // If both halves are used, return as it is.
2641   if (LoExists && HiExists)
2642     return SDValue();
2643 
2644   // If the two computed results can be simplified separately, separate them.
2645   if (LoExists) {
2646     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2647     AddToWorklist(Lo.getNode());
2648     SDValue LoOpt = combine(Lo.getNode());
2649     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2650         (!LegalOperations ||
2651          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2652       return CombineTo(N, LoOpt, LoOpt);
2653   }
2654 
2655   if (HiExists) {
2656     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2657     AddToWorklist(Hi.getNode());
2658     SDValue HiOpt = combine(Hi.getNode());
2659     if (HiOpt.getNode() && HiOpt != Hi &&
2660         (!LegalOperations ||
2661          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2662       return CombineTo(N, HiOpt, HiOpt);
2663   }
2664 
2665   return SDValue();
2666 }
2667 
2668 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2669   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2670     return Res;
2671 
2672   EVT VT = N->getValueType(0);
2673   SDLoc DL(N);
2674 
2675   // If the type is twice as wide is legal, transform the mulhu to a wider
2676   // multiply plus a shift.
2677   if (VT.isSimple() && !VT.isVector()) {
2678     MVT Simple = VT.getSimpleVT();
2679     unsigned SimpleSize = Simple.getSizeInBits();
2680     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2681     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2682       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2683       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2684       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2685       // Compute the high part as N1.
2686       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2687             DAG.getConstant(SimpleSize, DL,
2688                             getShiftAmountTy(Lo.getValueType())));
2689       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2690       // Compute the low part as N0.
2691       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2692       return CombineTo(N, Lo, Hi);
2693     }
2694   }
2695 
2696   return SDValue();
2697 }
2698 
2699 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2700   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2701     return Res;
2702 
2703   EVT VT = N->getValueType(0);
2704   SDLoc DL(N);
2705 
2706   // If the type is twice as wide is legal, transform the mulhu to a wider
2707   // multiply plus a shift.
2708   if (VT.isSimple() && !VT.isVector()) {
2709     MVT Simple = VT.getSimpleVT();
2710     unsigned SimpleSize = Simple.getSizeInBits();
2711     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2712     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2713       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2714       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2715       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2716       // Compute the high part as N1.
2717       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2718             DAG.getConstant(SimpleSize, DL,
2719                             getShiftAmountTy(Lo.getValueType())));
2720       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2721       // Compute the low part as N0.
2722       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2723       return CombineTo(N, Lo, Hi);
2724     }
2725   }
2726 
2727   return SDValue();
2728 }
2729 
2730 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2731   // (smulo x, 2) -> (saddo x, x)
2732   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2733     if (C2->getAPIntValue() == 2)
2734       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2735                          N->getOperand(0), N->getOperand(0));
2736 
2737   return SDValue();
2738 }
2739 
2740 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2741   // (umulo x, 2) -> (uaddo x, x)
2742   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2743     if (C2->getAPIntValue() == 2)
2744       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2745                          N->getOperand(0), N->getOperand(0));
2746 
2747   return SDValue();
2748 }
2749 
2750 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2751   SDValue N0 = N->getOperand(0);
2752   SDValue N1 = N->getOperand(1);
2753   EVT VT = N0.getValueType();
2754 
2755   // fold vector ops
2756   if (VT.isVector())
2757     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2758       return FoldedVOp;
2759 
2760   // fold (add c1, c2) -> c1+c2
2761   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
2762   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2763   if (N0C && N1C)
2764     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
2765 
2766   // canonicalize constant to RHS
2767   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2768      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2769     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
2770 
2771   return SDValue();
2772 }
2773 
2774 /// If this is a binary operator with two operands of the same opcode, try to
2775 /// simplify it.
2776 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2777   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2778   EVT VT = N0.getValueType();
2779   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2780 
2781   // Bail early if none of these transforms apply.
2782   if (N0.getNode()->getNumOperands() == 0) return SDValue();
2783 
2784   // For each of OP in AND/OR/XOR:
2785   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2786   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2787   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2788   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2789   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2790   //
2791   // do not sink logical op inside of a vector extend, since it may combine
2792   // into a vsetcc.
2793   EVT Op0VT = N0.getOperand(0).getValueType();
2794   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2795        N0.getOpcode() == ISD::SIGN_EXTEND ||
2796        N0.getOpcode() == ISD::BSWAP ||
2797        // Avoid infinite looping with PromoteIntBinOp.
2798        (N0.getOpcode() == ISD::ANY_EXTEND &&
2799         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2800        (N0.getOpcode() == ISD::TRUNCATE &&
2801         (!TLI.isZExtFree(VT, Op0VT) ||
2802          !TLI.isTruncateFree(Op0VT, VT)) &&
2803         TLI.isTypeLegal(Op0VT))) &&
2804       !VT.isVector() &&
2805       Op0VT == N1.getOperand(0).getValueType() &&
2806       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2807     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2808                                  N0.getOperand(0).getValueType(),
2809                                  N0.getOperand(0), N1.getOperand(0));
2810     AddToWorklist(ORNode.getNode());
2811     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2812   }
2813 
2814   // For each of OP in SHL/SRL/SRA/AND...
2815   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2816   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
2817   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2818   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2819        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2820       N0.getOperand(1) == N1.getOperand(1)) {
2821     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2822                                  N0.getOperand(0).getValueType(),
2823                                  N0.getOperand(0), N1.getOperand(0));
2824     AddToWorklist(ORNode.getNode());
2825     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2826                        ORNode, N0.getOperand(1));
2827   }
2828 
2829   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2830   // Only perform this optimization up until type legalization, before
2831   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2832   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2833   // we don't want to undo this promotion.
2834   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2835   // on scalars.
2836   if ((N0.getOpcode() == ISD::BITCAST ||
2837        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2838        Level <= AfterLegalizeTypes) {
2839     SDValue In0 = N0.getOperand(0);
2840     SDValue In1 = N1.getOperand(0);
2841     EVT In0Ty = In0.getValueType();
2842     EVT In1Ty = In1.getValueType();
2843     SDLoc DL(N);
2844     // If both incoming values are integers, and the original types are the
2845     // same.
2846     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2847       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2848       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2849       AddToWorklist(Op.getNode());
2850       return BC;
2851     }
2852   }
2853 
2854   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2855   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2856   // If both shuffles use the same mask, and both shuffle within a single
2857   // vector, then it is worthwhile to move the swizzle after the operation.
2858   // The type-legalizer generates this pattern when loading illegal
2859   // vector types from memory. In many cases this allows additional shuffle
2860   // optimizations.
2861   // There are other cases where moving the shuffle after the xor/and/or
2862   // is profitable even if shuffles don't perform a swizzle.
2863   // If both shuffles use the same mask, and both shuffles have the same first
2864   // or second operand, then it might still be profitable to move the shuffle
2865   // after the xor/and/or operation.
2866   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
2867     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2868     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2869 
2870     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2871            "Inputs to shuffles are not the same type");
2872 
2873     // Check that both shuffles use the same mask. The masks are known to be of
2874     // the same length because the result vector type is the same.
2875     // Check also that shuffles have only one use to avoid introducing extra
2876     // instructions.
2877     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2878         SVN0->getMask().equals(SVN1->getMask())) {
2879       SDValue ShOp = N0->getOperand(1);
2880 
2881       // Don't try to fold this node if it requires introducing a
2882       // build vector of all zeros that might be illegal at this stage.
2883       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2884         if (!LegalTypes)
2885           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2886         else
2887           ShOp = SDValue();
2888       }
2889 
2890       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2891       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
2892       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2893       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2894         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2895                                       N0->getOperand(0), N1->getOperand(0));
2896         AddToWorklist(NewNode.getNode());
2897         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2898                                     SVN0->getMask());
2899       }
2900 
2901       // Don't try to fold this node if it requires introducing a
2902       // build vector of all zeros that might be illegal at this stage.
2903       ShOp = N0->getOperand(0);
2904       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2905         if (!LegalTypes)
2906           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2907         else
2908           ShOp = SDValue();
2909       }
2910 
2911       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2912       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
2913       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2914       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2915         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2916                                       N0->getOperand(1), N1->getOperand(1));
2917         AddToWorklist(NewNode.getNode());
2918         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2919                                     SVN0->getMask());
2920       }
2921     }
2922   }
2923 
2924   return SDValue();
2925 }
2926 
2927 /// This contains all DAGCombine rules which reduce two values combined by
2928 /// an And operation to a single value. This makes them reusable in the context
2929 /// of visitSELECT(). Rules involving constants are not included as
2930 /// visitSELECT() already handles those cases.
2931 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2932                                   SDNode *LocReference) {
2933   EVT VT = N1.getValueType();
2934 
2935   // fold (and x, undef) -> 0
2936   if (N0.isUndef() || N1.isUndef())
2937     return DAG.getConstant(0, SDLoc(LocReference), VT);
2938   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2939   SDValue LL, LR, RL, RR, CC0, CC1;
2940   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2941     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2942     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2943 
2944     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2945         LL.getValueType().isInteger()) {
2946       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2947       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2948         EVT CCVT = getSetCCResultType(LR.getValueType());
2949         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2950           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2951                                        LR.getValueType(), LL, RL);
2952           AddToWorklist(ORNode.getNode());
2953           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2954         }
2955       }
2956       if (isAllOnesConstant(LR)) {
2957         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2958         if (Op1 == ISD::SETEQ) {
2959           EVT CCVT = getSetCCResultType(LR.getValueType());
2960           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2961             SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2962                                           LR.getValueType(), LL, RL);
2963             AddToWorklist(ANDNode.getNode());
2964             return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2965           }
2966         }
2967         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2968         if (Op1 == ISD::SETGT) {
2969           EVT CCVT = getSetCCResultType(LR.getValueType());
2970           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2971             SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2972                                          LR.getValueType(), LL, RL);
2973             AddToWorklist(ORNode.getNode());
2974             return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2975           }
2976         }
2977       }
2978     }
2979     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2980     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2981         Op0 == Op1 && LL.getValueType().isInteger() &&
2982       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2983                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2984       EVT CCVT = getSetCCResultType(LL.getValueType());
2985       if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2986         SDLoc DL(N0);
2987         SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2988                                       LL, DAG.getConstant(1, DL,
2989                                                           LL.getValueType()));
2990         AddToWorklist(ADDNode.getNode());
2991         return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
2992                             DAG.getConstant(2, DL, LL.getValueType()),
2993                             ISD::SETUGE);
2994       }
2995     }
2996     // canonicalize equivalent to ll == rl
2997     if (LL == RR && LR == RL) {
2998       Op1 = ISD::getSetCCSwappedOperands(Op1);
2999       std::swap(RL, RR);
3000     }
3001     if (LL == RL && LR == RR) {
3002       bool isInteger = LL.getValueType().isInteger();
3003       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
3004       if (Result != ISD::SETCC_INVALID &&
3005           (!LegalOperations ||
3006            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3007             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3008         EVT CCVT = getSetCCResultType(LL.getValueType());
3009         if (N0.getValueType() == CCVT ||
3010             (!LegalOperations && N0.getValueType() == MVT::i1))
3011           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3012                               LL, LR, Result);
3013       }
3014     }
3015   }
3016 
3017   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3018       VT.getSizeInBits() <= 64) {
3019     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3020       APInt ADDC = ADDI->getAPIntValue();
3021       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3022         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3023         // immediate for an add, but it is legal if its top c2 bits are set,
3024         // transform the ADD so the immediate doesn't need to be materialized
3025         // in a register.
3026         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3027           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3028                                              SRLI->getZExtValue());
3029           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3030             ADDC |= Mask;
3031             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3032               SDLoc DL(N0);
3033               SDValue NewAdd =
3034                 DAG.getNode(ISD::ADD, DL, VT,
3035                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3036               CombineTo(N0.getNode(), NewAdd);
3037               // Return N so it doesn't get rechecked!
3038               return SDValue(LocReference, 0);
3039             }
3040           }
3041         }
3042       }
3043     }
3044   }
3045 
3046   // Reduce bit extract of low half of an integer to the narrower type.
3047   // (and (srl i64:x, K), KMask) ->
3048   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3049   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3050     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3051       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3052         unsigned Size = VT.getSizeInBits();
3053         const APInt &AndMask = CAnd->getAPIntValue();
3054         unsigned ShiftBits = CShift->getZExtValue();
3055         unsigned MaskBits = AndMask.countTrailingOnes();
3056         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3057 
3058         if (APIntOps::isMask(AndMask) &&
3059             // Required bits must not span the two halves of the integer and
3060             // must fit in the half size type.
3061             (ShiftBits + MaskBits <= Size / 2) &&
3062             TLI.isNarrowingProfitable(VT, HalfVT) &&
3063             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3064             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3065             TLI.isTruncateFree(VT, HalfVT) &&
3066             TLI.isZExtFree(HalfVT, VT)) {
3067           // The isNarrowingProfitable is to avoid regressions on PPC and
3068           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3069           // on downstream users of this. Those patterns could probably be
3070           // extended to handle extensions mixed in.
3071 
3072           SDValue SL(N0);
3073           assert(ShiftBits != 0 && MaskBits <= Size);
3074 
3075           // Extracting the highest bit of the low half.
3076           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3077           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3078                                       N0.getOperand(0));
3079 
3080           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3081           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3082           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3083           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3084           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3085         }
3086       }
3087     }
3088   }
3089 
3090   return SDValue();
3091 }
3092 
3093 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3094                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3095                                    bool &NarrowLoad) {
3096   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3097 
3098   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
3099     return false;
3100 
3101   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3102   LoadedVT = LoadN->getMemoryVT();
3103 
3104   if (ExtVT == LoadedVT &&
3105       (!LegalOperations ||
3106        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3107     // ZEXTLOAD will match without needing to change the size of the value being
3108     // loaded.
3109     NarrowLoad = false;
3110     return true;
3111   }
3112 
3113   // Do not change the width of a volatile load.
3114   if (LoadN->isVolatile())
3115     return false;
3116 
3117   // Do not generate loads of non-round integer types since these can
3118   // be expensive (and would be wrong if the type is not byte sized).
3119   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3120     return false;
3121 
3122   if (LegalOperations &&
3123       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3124     return false;
3125 
3126   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3127     return false;
3128 
3129   NarrowLoad = true;
3130   return true;
3131 }
3132 
3133 SDValue DAGCombiner::visitAND(SDNode *N) {
3134   SDValue N0 = N->getOperand(0);
3135   SDValue N1 = N->getOperand(1);
3136   EVT VT = N1.getValueType();
3137 
3138   // fold vector ops
3139   if (VT.isVector()) {
3140     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3141       return FoldedVOp;
3142 
3143     // fold (and x, 0) -> 0, vector edition
3144     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3145       // do not return N0, because undef node may exist in N0
3146       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3147                              SDLoc(N), N0.getValueType());
3148     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3149       // do not return N1, because undef node may exist in N1
3150       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3151                              SDLoc(N), N1.getValueType());
3152 
3153     // fold (and x, -1) -> x, vector edition
3154     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3155       return N1;
3156     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3157       return N0;
3158   }
3159 
3160   // fold (and c1, c2) -> c1&c2
3161   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3162   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3163   if (N0C && N1C && !N1C->isOpaque())
3164     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3165   // canonicalize constant to RHS
3166   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3167      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3168     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3169   // fold (and x, -1) -> x
3170   if (isAllOnesConstant(N1))
3171     return N0;
3172   // if (and x, c) is known to be zero, return 0
3173   unsigned BitWidth = VT.getScalarSizeInBits();
3174   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3175                                    APInt::getAllOnesValue(BitWidth)))
3176     return DAG.getConstant(0, SDLoc(N), VT);
3177   // reassociate and
3178   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3179     return RAND;
3180   // fold (and (or x, C), D) -> D if (C & D) == D
3181   if (N1C && N0.getOpcode() == ISD::OR)
3182     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3183       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3184         return N1;
3185   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3186   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3187     SDValue N0Op0 = N0.getOperand(0);
3188     APInt Mask = ~N1C->getAPIntValue();
3189     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3190     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3191       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3192                                  N0.getValueType(), N0Op0);
3193 
3194       // Replace uses of the AND with uses of the Zero extend node.
3195       CombineTo(N, Zext);
3196 
3197       // We actually want to replace all uses of the any_extend with the
3198       // zero_extend, to avoid duplicating things.  This will later cause this
3199       // AND to be folded.
3200       CombineTo(N0.getNode(), Zext);
3201       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3202     }
3203   }
3204   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3205   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3206   // already be zero by virtue of the width of the base type of the load.
3207   //
3208   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3209   // more cases.
3210   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3211        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3212        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3213        N0.getOperand(0).getResNo() == 0) ||
3214       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3215     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3216                                          N0 : N0.getOperand(0) );
3217 
3218     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3219     // This can be a pure constant or a vector splat, in which case we treat the
3220     // vector as a scalar and use the splat value.
3221     APInt Constant = APInt::getNullValue(1);
3222     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3223       Constant = C->getAPIntValue();
3224     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3225       APInt SplatValue, SplatUndef;
3226       unsigned SplatBitSize;
3227       bool HasAnyUndefs;
3228       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3229                                              SplatBitSize, HasAnyUndefs);
3230       if (IsSplat) {
3231         // Undef bits can contribute to a possible optimisation if set, so
3232         // set them.
3233         SplatValue |= SplatUndef;
3234 
3235         // The splat value may be something like "0x00FFFFFF", which means 0 for
3236         // the first vector value and FF for the rest, repeating. We need a mask
3237         // that will apply equally to all members of the vector, so AND all the
3238         // lanes of the constant together.
3239         EVT VT = Vector->getValueType(0);
3240         unsigned BitWidth = VT.getScalarSizeInBits();
3241 
3242         // If the splat value has been compressed to a bitlength lower
3243         // than the size of the vector lane, we need to re-expand it to
3244         // the lane size.
3245         if (BitWidth > SplatBitSize)
3246           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3247                SplatBitSize < BitWidth;
3248                SplatBitSize = SplatBitSize * 2)
3249             SplatValue |= SplatValue.shl(SplatBitSize);
3250 
3251         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3252         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3253         if (SplatBitSize % BitWidth == 0) {
3254           Constant = APInt::getAllOnesValue(BitWidth);
3255           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3256             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3257         }
3258       }
3259     }
3260 
3261     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3262     // actually legal and isn't going to get expanded, else this is a false
3263     // optimisation.
3264     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3265                                                     Load->getValueType(0),
3266                                                     Load->getMemoryVT());
3267 
3268     // Resize the constant to the same size as the original memory access before
3269     // extension. If it is still the AllOnesValue then this AND is completely
3270     // unneeded.
3271     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3272 
3273     bool B;
3274     switch (Load->getExtensionType()) {
3275     default: B = false; break;
3276     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3277     case ISD::ZEXTLOAD:
3278     case ISD::NON_EXTLOAD: B = true; break;
3279     }
3280 
3281     if (B && Constant.isAllOnesValue()) {
3282       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3283       // preserve semantics once we get rid of the AND.
3284       SDValue NewLoad(Load, 0);
3285       if (Load->getExtensionType() == ISD::EXTLOAD) {
3286         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3287                               Load->getValueType(0), SDLoc(Load),
3288                               Load->getChain(), Load->getBasePtr(),
3289                               Load->getOffset(), Load->getMemoryVT(),
3290                               Load->getMemOperand());
3291         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3292         if (Load->getNumValues() == 3) {
3293           // PRE/POST_INC loads have 3 values.
3294           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3295                            NewLoad.getValue(2) };
3296           CombineTo(Load, To, 3, true);
3297         } else {
3298           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3299         }
3300       }
3301 
3302       // Fold the AND away, taking care not to fold to the old load node if we
3303       // replaced it.
3304       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3305 
3306       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3307     }
3308   }
3309 
3310   // fold (and (load x), 255) -> (zextload x, i8)
3311   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3312   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3313   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3314                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3315                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3316     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3317     LoadSDNode *LN0 = HasAnyExt
3318       ? cast<LoadSDNode>(N0.getOperand(0))
3319       : cast<LoadSDNode>(N0);
3320     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3321         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3322       auto NarrowLoad = false;
3323       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3324       EVT ExtVT, LoadedVT;
3325       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3326                            NarrowLoad)) {
3327         if (!NarrowLoad) {
3328           SDValue NewLoad =
3329             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3330                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3331                            LN0->getMemOperand());
3332           AddToWorklist(N);
3333           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3334           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3335         } else {
3336           EVT PtrType = LN0->getOperand(1).getValueType();
3337 
3338           unsigned Alignment = LN0->getAlignment();
3339           SDValue NewPtr = LN0->getBasePtr();
3340 
3341           // For big endian targets, we need to add an offset to the pointer
3342           // to load the correct bytes.  For little endian systems, we merely
3343           // need to read fewer bytes from the same pointer.
3344           if (DAG.getDataLayout().isBigEndian()) {
3345             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3346             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3347             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3348             SDLoc DL(LN0);
3349             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3350                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3351             Alignment = MinAlign(Alignment, PtrOff);
3352           }
3353 
3354           AddToWorklist(NewPtr.getNode());
3355 
3356           SDValue Load = DAG.getExtLoad(
3357               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3358               LN0->getPointerInfo(), ExtVT, Alignment,
3359               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3360           AddToWorklist(N);
3361           CombineTo(LN0, Load, Load.getValue(1));
3362           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3363         }
3364       }
3365     }
3366   }
3367 
3368   if (SDValue Combined = visitANDLike(N0, N1, N))
3369     return Combined;
3370 
3371   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3372   if (N0.getOpcode() == N1.getOpcode())
3373     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3374       return Tmp;
3375 
3376   // Masking the negated extension of a boolean is just the zero-extended
3377   // boolean:
3378   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3379   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3380   //
3381   // Note: the SimplifyDemandedBits fold below can make an information-losing
3382   // transform, and then we have no way to find this better fold.
3383   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3384     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3385     SDValue SubRHS = N0.getOperand(1);
3386     if (SubLHS && SubLHS->isNullValue()) {
3387       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3388           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3389         return SubRHS;
3390       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3391           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3392         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3393     }
3394   }
3395 
3396   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3397   // fold (and (sra)) -> (and (srl)) when possible.
3398   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
3399     return SDValue(N, 0);
3400 
3401   // fold (zext_inreg (extload x)) -> (zextload x)
3402   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3403     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3404     EVT MemVT = LN0->getMemoryVT();
3405     // If we zero all the possible extended bits, then we can turn this into
3406     // a zextload if we are running before legalize or the operation is legal.
3407     unsigned BitWidth = N1.getScalarValueSizeInBits();
3408     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3409                            BitWidth - MemVT.getScalarSizeInBits())) &&
3410         ((!LegalOperations && !LN0->isVolatile()) ||
3411          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3412       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3413                                        LN0->getChain(), LN0->getBasePtr(),
3414                                        MemVT, LN0->getMemOperand());
3415       AddToWorklist(N);
3416       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3417       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3418     }
3419   }
3420   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3421   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3422       N0.hasOneUse()) {
3423     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3424     EVT MemVT = LN0->getMemoryVT();
3425     // If we zero all the possible extended bits, then we can turn this into
3426     // a zextload if we are running before legalize or the operation is legal.
3427     unsigned BitWidth = N1.getScalarValueSizeInBits();
3428     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3429                            BitWidth - MemVT.getScalarSizeInBits())) &&
3430         ((!LegalOperations && !LN0->isVolatile()) ||
3431          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3432       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3433                                        LN0->getChain(), LN0->getBasePtr(),
3434                                        MemVT, LN0->getMemOperand());
3435       AddToWorklist(N);
3436       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3437       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3438     }
3439   }
3440   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3441   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3442     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3443                                            N0.getOperand(1), false))
3444       return BSwap;
3445   }
3446 
3447   return SDValue();
3448 }
3449 
3450 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3451 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3452                                         bool DemandHighBits) {
3453   if (!LegalOperations)
3454     return SDValue();
3455 
3456   EVT VT = N->getValueType(0);
3457   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3458     return SDValue();
3459   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3460     return SDValue();
3461 
3462   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3463   bool LookPassAnd0 = false;
3464   bool LookPassAnd1 = false;
3465   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3466       std::swap(N0, N1);
3467   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3468       std::swap(N0, N1);
3469   if (N0.getOpcode() == ISD::AND) {
3470     if (!N0.getNode()->hasOneUse())
3471       return SDValue();
3472     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3473     if (!N01C || N01C->getZExtValue() != 0xFF00)
3474       return SDValue();
3475     N0 = N0.getOperand(0);
3476     LookPassAnd0 = true;
3477   }
3478 
3479   if (N1.getOpcode() == ISD::AND) {
3480     if (!N1.getNode()->hasOneUse())
3481       return SDValue();
3482     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3483     if (!N11C || N11C->getZExtValue() != 0xFF)
3484       return SDValue();
3485     N1 = N1.getOperand(0);
3486     LookPassAnd1 = true;
3487   }
3488 
3489   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3490     std::swap(N0, N1);
3491   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3492     return SDValue();
3493   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3494     return SDValue();
3495 
3496   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3497   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3498   if (!N01C || !N11C)
3499     return SDValue();
3500   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3501     return SDValue();
3502 
3503   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3504   SDValue N00 = N0->getOperand(0);
3505   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3506     if (!N00.getNode()->hasOneUse())
3507       return SDValue();
3508     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3509     if (!N001C || N001C->getZExtValue() != 0xFF)
3510       return SDValue();
3511     N00 = N00.getOperand(0);
3512     LookPassAnd0 = true;
3513   }
3514 
3515   SDValue N10 = N1->getOperand(0);
3516   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3517     if (!N10.getNode()->hasOneUse())
3518       return SDValue();
3519     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3520     if (!N101C || N101C->getZExtValue() != 0xFF00)
3521       return SDValue();
3522     N10 = N10.getOperand(0);
3523     LookPassAnd1 = true;
3524   }
3525 
3526   if (N00 != N10)
3527     return SDValue();
3528 
3529   // Make sure everything beyond the low halfword gets set to zero since the SRL
3530   // 16 will clear the top bits.
3531   unsigned OpSizeInBits = VT.getSizeInBits();
3532   if (DemandHighBits && OpSizeInBits > 16) {
3533     // If the left-shift isn't masked out then the only way this is a bswap is
3534     // if all bits beyond the low 8 are 0. In that case the entire pattern
3535     // reduces to a left shift anyway: leave it for other parts of the combiner.
3536     if (!LookPassAnd0)
3537       return SDValue();
3538 
3539     // However, if the right shift isn't masked out then it might be because
3540     // it's not needed. See if we can spot that too.
3541     if (!LookPassAnd1 &&
3542         !DAG.MaskedValueIsZero(
3543             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3544       return SDValue();
3545   }
3546 
3547   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3548   if (OpSizeInBits > 16) {
3549     SDLoc DL(N);
3550     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3551                       DAG.getConstant(OpSizeInBits - 16, DL,
3552                                       getShiftAmountTy(VT)));
3553   }
3554   return Res;
3555 }
3556 
3557 /// Return true if the specified node is an element that makes up a 32-bit
3558 /// packed halfword byteswap.
3559 /// ((x & 0x000000ff) << 8) |
3560 /// ((x & 0x0000ff00) >> 8) |
3561 /// ((x & 0x00ff0000) << 8) |
3562 /// ((x & 0xff000000) >> 8)
3563 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3564   if (!N.getNode()->hasOneUse())
3565     return false;
3566 
3567   unsigned Opc = N.getOpcode();
3568   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3569     return false;
3570 
3571   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3572   if (!N1C)
3573     return false;
3574 
3575   unsigned Num;
3576   switch (N1C->getZExtValue()) {
3577   default:
3578     return false;
3579   case 0xFF:       Num = 0; break;
3580   case 0xFF00:     Num = 1; break;
3581   case 0xFF0000:   Num = 2; break;
3582   case 0xFF000000: Num = 3; break;
3583   }
3584 
3585   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3586   SDValue N0 = N.getOperand(0);
3587   if (Opc == ISD::AND) {
3588     if (Num == 0 || Num == 2) {
3589       // (x >> 8) & 0xff
3590       // (x >> 8) & 0xff0000
3591       if (N0.getOpcode() != ISD::SRL)
3592         return false;
3593       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3594       if (!C || C->getZExtValue() != 8)
3595         return false;
3596     } else {
3597       // (x << 8) & 0xff00
3598       // (x << 8) & 0xff000000
3599       if (N0.getOpcode() != ISD::SHL)
3600         return false;
3601       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3602       if (!C || C->getZExtValue() != 8)
3603         return false;
3604     }
3605   } else if (Opc == ISD::SHL) {
3606     // (x & 0xff) << 8
3607     // (x & 0xff0000) << 8
3608     if (Num != 0 && Num != 2)
3609       return false;
3610     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3611     if (!C || C->getZExtValue() != 8)
3612       return false;
3613   } else { // Opc == ISD::SRL
3614     // (x & 0xff00) >> 8
3615     // (x & 0xff000000) >> 8
3616     if (Num != 1 && Num != 3)
3617       return false;
3618     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3619     if (!C || C->getZExtValue() != 8)
3620       return false;
3621   }
3622 
3623   if (Parts[Num])
3624     return false;
3625 
3626   Parts[Num] = N0.getOperand(0).getNode();
3627   return true;
3628 }
3629 
3630 /// Match a 32-bit packed halfword bswap. That is
3631 /// ((x & 0x000000ff) << 8) |
3632 /// ((x & 0x0000ff00) >> 8) |
3633 /// ((x & 0x00ff0000) << 8) |
3634 /// ((x & 0xff000000) >> 8)
3635 /// => (rotl (bswap x), 16)
3636 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3637   if (!LegalOperations)
3638     return SDValue();
3639 
3640   EVT VT = N->getValueType(0);
3641   if (VT != MVT::i32)
3642     return SDValue();
3643   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3644     return SDValue();
3645 
3646   // Look for either
3647   // (or (or (and), (and)), (or (and), (and)))
3648   // (or (or (or (and), (and)), (and)), (and))
3649   if (N0.getOpcode() != ISD::OR)
3650     return SDValue();
3651   SDValue N00 = N0.getOperand(0);
3652   SDValue N01 = N0.getOperand(1);
3653   SDNode *Parts[4] = {};
3654 
3655   if (N1.getOpcode() == ISD::OR &&
3656       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3657     // (or (or (and), (and)), (or (and), (and)))
3658     SDValue N000 = N00.getOperand(0);
3659     if (!isBSwapHWordElement(N000, Parts))
3660       return SDValue();
3661 
3662     SDValue N001 = N00.getOperand(1);
3663     if (!isBSwapHWordElement(N001, Parts))
3664       return SDValue();
3665     SDValue N010 = N01.getOperand(0);
3666     if (!isBSwapHWordElement(N010, Parts))
3667       return SDValue();
3668     SDValue N011 = N01.getOperand(1);
3669     if (!isBSwapHWordElement(N011, Parts))
3670       return SDValue();
3671   } else {
3672     // (or (or (or (and), (and)), (and)), (and))
3673     if (!isBSwapHWordElement(N1, Parts))
3674       return SDValue();
3675     if (!isBSwapHWordElement(N01, Parts))
3676       return SDValue();
3677     if (N00.getOpcode() != ISD::OR)
3678       return SDValue();
3679     SDValue N000 = N00.getOperand(0);
3680     if (!isBSwapHWordElement(N000, Parts))
3681       return SDValue();
3682     SDValue N001 = N00.getOperand(1);
3683     if (!isBSwapHWordElement(N001, Parts))
3684       return SDValue();
3685   }
3686 
3687   // Make sure the parts are all coming from the same node.
3688   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3689     return SDValue();
3690 
3691   SDLoc DL(N);
3692   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3693                               SDValue(Parts[0], 0));
3694 
3695   // Result of the bswap should be rotated by 16. If it's not legal, then
3696   // do  (x << 16) | (x >> 16).
3697   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3698   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3699     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3700   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3701     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3702   return DAG.getNode(ISD::OR, DL, VT,
3703                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3704                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3705 }
3706 
3707 /// This contains all DAGCombine rules which reduce two values combined by
3708 /// an Or operation to a single value \see visitANDLike().
3709 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3710   EVT VT = N1.getValueType();
3711   // fold (or x, undef) -> -1
3712   if (!LegalOperations &&
3713       (N0.isUndef() || N1.isUndef())) {
3714     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3715     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3716                            SDLoc(LocReference), VT);
3717   }
3718   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3719   SDValue LL, LR, RL, RR, CC0, CC1;
3720   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3721     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3722     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3723 
3724     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3725       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3726       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3727       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3728         EVT CCVT = getSetCCResultType(LR.getValueType());
3729         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3730           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3731                                        LR.getValueType(), LL, RL);
3732           AddToWorklist(ORNode.getNode());
3733           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3734         }
3735       }
3736       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3737       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
3738       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3739         EVT CCVT = getSetCCResultType(LR.getValueType());
3740         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3741           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3742                                         LR.getValueType(), LL, RL);
3743           AddToWorklist(ANDNode.getNode());
3744           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3745         }
3746       }
3747     }
3748     // canonicalize equivalent to ll == rl
3749     if (LL == RR && LR == RL) {
3750       Op1 = ISD::getSetCCSwappedOperands(Op1);
3751       std::swap(RL, RR);
3752     }
3753     if (LL == RL && LR == RR) {
3754       bool isInteger = LL.getValueType().isInteger();
3755       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3756       if (Result != ISD::SETCC_INVALID &&
3757           (!LegalOperations ||
3758            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3759             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3760         EVT CCVT = getSetCCResultType(LL.getValueType());
3761         if (N0.getValueType() == CCVT ||
3762             (!LegalOperations && N0.getValueType() == MVT::i1))
3763           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3764                               LL, LR, Result);
3765       }
3766     }
3767   }
3768 
3769   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
3770   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3771       // Don't increase # computations.
3772       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3773     // We can only do this xform if we know that bits from X that are set in C2
3774     // but not in C1 are already zero.  Likewise for Y.
3775     if (const ConstantSDNode *N0O1C =
3776         getAsNonOpaqueConstant(N0.getOperand(1))) {
3777       if (const ConstantSDNode *N1O1C =
3778           getAsNonOpaqueConstant(N1.getOperand(1))) {
3779         // We can only do this xform if we know that bits from X that are set in
3780         // C2 but not in C1 are already zero.  Likewise for Y.
3781         const APInt &LHSMask = N0O1C->getAPIntValue();
3782         const APInt &RHSMask = N1O1C->getAPIntValue();
3783 
3784         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3785             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3786           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3787                                   N0.getOperand(0), N1.getOperand(0));
3788           SDLoc DL(LocReference);
3789           return DAG.getNode(ISD::AND, DL, VT, X,
3790                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
3791         }
3792       }
3793     }
3794   }
3795 
3796   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3797   if (N0.getOpcode() == ISD::AND &&
3798       N1.getOpcode() == ISD::AND &&
3799       N0.getOperand(0) == N1.getOperand(0) &&
3800       // Don't increase # computations.
3801       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3802     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3803                             N0.getOperand(1), N1.getOperand(1));
3804     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3805   }
3806 
3807   return SDValue();
3808 }
3809 
3810 SDValue DAGCombiner::visitOR(SDNode *N) {
3811   SDValue N0 = N->getOperand(0);
3812   SDValue N1 = N->getOperand(1);
3813   EVT VT = N1.getValueType();
3814 
3815   // fold vector ops
3816   if (VT.isVector()) {
3817     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3818       return FoldedVOp;
3819 
3820     // fold (or x, 0) -> x, vector edition
3821     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3822       return N1;
3823     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3824       return N0;
3825 
3826     // fold (or x, -1) -> -1, vector edition
3827     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3828       // do not return N0, because undef node may exist in N0
3829       return DAG.getConstant(
3830           APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
3831           N0.getValueType());
3832     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3833       // do not return N1, because undef node may exist in N1
3834       return DAG.getConstant(
3835           APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
3836           N1.getValueType());
3837 
3838     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
3839     // Do this only if the resulting shuffle is legal.
3840     if (isa<ShuffleVectorSDNode>(N0) &&
3841         isa<ShuffleVectorSDNode>(N1) &&
3842         // Avoid folding a node with illegal type.
3843         TLI.isTypeLegal(VT)) {
3844       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
3845       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
3846       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
3847       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
3848       // Ensure both shuffles have a zero input.
3849       if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
3850         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
3851         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
3852         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3853         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3854         bool CanFold = true;
3855         int NumElts = VT.getVectorNumElements();
3856         SmallVector<int, 4> Mask(NumElts);
3857 
3858         for (int i = 0; i != NumElts; ++i) {
3859           int M0 = SV0->getMaskElt(i);
3860           int M1 = SV1->getMaskElt(i);
3861 
3862           // Determine if either index is pointing to a zero vector.
3863           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
3864           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
3865 
3866           // If one element is zero and the otherside is undef, keep undef.
3867           // This also handles the case that both are undef.
3868           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
3869             Mask[i] = -1;
3870             continue;
3871           }
3872 
3873           // Make sure only one of the elements is zero.
3874           if (M0Zero == M1Zero) {
3875             CanFold = false;
3876             break;
3877           }
3878 
3879           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
3880 
3881           // We have a zero and non-zero element. If the non-zero came from
3882           // SV0 make the index a LHS index. If it came from SV1, make it
3883           // a RHS index. We need to mod by NumElts because we don't care
3884           // which operand it came from in the original shuffles.
3885           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
3886         }
3887 
3888         if (CanFold) {
3889           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
3890           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
3891 
3892           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3893           if (!LegalMask) {
3894             std::swap(NewLHS, NewRHS);
3895             ShuffleVectorSDNode::commuteMask(Mask);
3896             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3897           }
3898 
3899           if (LegalMask)
3900             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
3901         }
3902       }
3903     }
3904   }
3905 
3906   // fold (or c1, c2) -> c1|c2
3907   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3908   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3909   if (N0C && N1C && !N1C->isOpaque())
3910     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3911   // canonicalize constant to RHS
3912   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3913      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3914     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3915   // fold (or x, 0) -> x
3916   if (isNullConstant(N1))
3917     return N0;
3918   // fold (or x, -1) -> -1
3919   if (isAllOnesConstant(N1))
3920     return N1;
3921   // fold (or x, c) -> c iff (x & ~c) == 0
3922   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3923     return N1;
3924 
3925   if (SDValue Combined = visitORLike(N0, N1, N))
3926     return Combined;
3927 
3928   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3929   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
3930     return BSwap;
3931   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
3932     return BSwap;
3933 
3934   // reassociate or
3935   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3936     return ROR;
3937   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3938   // iff (c1 & c2) == 0.
3939   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3940              isa<ConstantSDNode>(N0.getOperand(1))) {
3941     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3942     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3943       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3944                                                    N1C, C1))
3945         return DAG.getNode(
3946             ISD::AND, SDLoc(N), VT,
3947             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3948       return SDValue();
3949     }
3950   }
3951   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
3952   if (N0.getOpcode() == N1.getOpcode())
3953     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3954       return Tmp;
3955 
3956   // See if this is some rotate idiom.
3957   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3958     return SDValue(Rot, 0);
3959 
3960   // Simplify the operands using demanded-bits information.
3961   if (!VT.isVector() &&
3962       SimplifyDemandedBits(SDValue(N, 0)))
3963     return SDValue(N, 0);
3964 
3965   return SDValue();
3966 }
3967 
3968 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3969 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3970   if (Op.getOpcode() == ISD::AND) {
3971     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
3972       Mask = Op.getOperand(1);
3973       Op = Op.getOperand(0);
3974     } else {
3975       return false;
3976     }
3977   }
3978 
3979   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3980     Shift = Op;
3981     return true;
3982   }
3983 
3984   return false;
3985 }
3986 
3987 // Return true if we can prove that, whenever Neg and Pos are both in the
3988 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
3989 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3990 //
3991 //     (or (shift1 X, Neg), (shift2 X, Pos))
3992 //
3993 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3994 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
3995 // to consider shift amounts with defined behavior.
3996 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
3997   // If EltSize is a power of 2 then:
3998   //
3999   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4000   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4001   //
4002   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4003   // for the stronger condition:
4004   //
4005   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4006   //
4007   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4008   // we can just replace Neg with Neg' for the rest of the function.
4009   //
4010   // In other cases we check for the even stronger condition:
4011   //
4012   //     Neg == EltSize - Pos                                    [B]
4013   //
4014   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4015   // behavior if Pos == 0 (and consequently Neg == EltSize).
4016   //
4017   // We could actually use [A] whenever EltSize is a power of 2, but the
4018   // only extra cases that it would match are those uninteresting ones
4019   // where Neg and Pos are never in range at the same time.  E.g. for
4020   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4021   // as well as (sub 32, Pos), but:
4022   //
4023   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4024   //
4025   // always invokes undefined behavior for 32-bit X.
4026   //
4027   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4028   unsigned MaskLoBits = 0;
4029   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4030     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4031       if (NegC->getAPIntValue() == EltSize - 1) {
4032         Neg = Neg.getOperand(0);
4033         MaskLoBits = Log2_64(EltSize);
4034       }
4035     }
4036   }
4037 
4038   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4039   if (Neg.getOpcode() != ISD::SUB)
4040     return false;
4041   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4042   if (!NegC)
4043     return false;
4044   SDValue NegOp1 = Neg.getOperand(1);
4045 
4046   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4047   // Pos'.  The truncation is redundant for the purpose of the equality.
4048   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4049     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4050       if (PosC->getAPIntValue() == EltSize - 1)
4051         Pos = Pos.getOperand(0);
4052 
4053   // The condition we need is now:
4054   //
4055   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4056   //
4057   // If NegOp1 == Pos then we need:
4058   //
4059   //              EltSize & Mask == NegC & Mask
4060   //
4061   // (because "x & Mask" is a truncation and distributes through subtraction).
4062   APInt Width;
4063   if (Pos == NegOp1)
4064     Width = NegC->getAPIntValue();
4065 
4066   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4067   // Then the condition we want to prove becomes:
4068   //
4069   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4070   //
4071   // which, again because "x & Mask" is a truncation, becomes:
4072   //
4073   //                NegC & Mask == (EltSize - PosC) & Mask
4074   //             EltSize & Mask == (NegC + PosC) & Mask
4075   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4076     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4077       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4078     else
4079       return false;
4080   } else
4081     return false;
4082 
4083   // Now we just need to check that EltSize & Mask == Width & Mask.
4084   if (MaskLoBits)
4085     // EltSize & Mask is 0 since Mask is EltSize - 1.
4086     return Width.getLoBits(MaskLoBits) == 0;
4087   return Width == EltSize;
4088 }
4089 
4090 // A subroutine of MatchRotate used once we have found an OR of two opposite
4091 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4092 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4093 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4094 // Neg with outer conversions stripped away.
4095 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4096                                        SDValue Neg, SDValue InnerPos,
4097                                        SDValue InnerNeg, unsigned PosOpcode,
4098                                        unsigned NegOpcode, const SDLoc &DL) {
4099   // fold (or (shl x, (*ext y)),
4100   //          (srl x, (*ext (sub 32, y)))) ->
4101   //   (rotl x, y) or (rotr x, (sub 32, y))
4102   //
4103   // fold (or (shl x, (*ext (sub 32, y))),
4104   //          (srl x, (*ext y))) ->
4105   //   (rotr x, y) or (rotl x, (sub 32, y))
4106   EVT VT = Shifted.getValueType();
4107   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4108     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4109     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4110                        HasPos ? Pos : Neg).getNode();
4111   }
4112 
4113   return nullptr;
4114 }
4115 
4116 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4117 // idioms for rotate, and if the target supports rotation instructions, generate
4118 // a rot[lr].
4119 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4120   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4121   EVT VT = LHS.getValueType();
4122   if (!TLI.isTypeLegal(VT)) return nullptr;
4123 
4124   // The target must have at least one rotate flavor.
4125   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4126   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4127   if (!HasROTL && !HasROTR) return nullptr;
4128 
4129   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4130   SDValue LHSShift;   // The shift.
4131   SDValue LHSMask;    // AND value if any.
4132   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4133     return nullptr; // Not part of a rotate.
4134 
4135   SDValue RHSShift;   // The shift.
4136   SDValue RHSMask;    // AND value if any.
4137   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4138     return nullptr; // Not part of a rotate.
4139 
4140   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4141     return nullptr;   // Not shifting the same value.
4142 
4143   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4144     return nullptr;   // Shifts must disagree.
4145 
4146   // Canonicalize shl to left side in a shl/srl pair.
4147   if (RHSShift.getOpcode() == ISD::SHL) {
4148     std::swap(LHS, RHS);
4149     std::swap(LHSShift, RHSShift);
4150     std::swap(LHSMask, RHSMask);
4151   }
4152 
4153   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4154   SDValue LHSShiftArg = LHSShift.getOperand(0);
4155   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4156   SDValue RHSShiftArg = RHSShift.getOperand(0);
4157   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4158 
4159   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4160   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4161   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4162     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4163     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4164     if ((LShVal + RShVal) != EltSizeInBits)
4165       return nullptr;
4166 
4167     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4168                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4169 
4170     // If there is an AND of either shifted operand, apply it to the result.
4171     if (LHSMask.getNode() || RHSMask.getNode()) {
4172       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4173       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
4174 
4175       if (LHSMask.getNode()) {
4176         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4177         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4178                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4179                                        DAG.getConstant(RHSBits, DL, VT)));
4180       }
4181       if (RHSMask.getNode()) {
4182         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4183         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4184                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4185                                        DAG.getConstant(LHSBits, DL, VT)));
4186       }
4187 
4188       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4189     }
4190 
4191     return Rot.getNode();
4192   }
4193 
4194   // If there is a mask here, and we have a variable shift, we can't be sure
4195   // that we're masking out the right stuff.
4196   if (LHSMask.getNode() || RHSMask.getNode())
4197     return nullptr;
4198 
4199   // If the shift amount is sign/zext/any-extended just peel it off.
4200   SDValue LExtOp0 = LHSShiftAmt;
4201   SDValue RExtOp0 = RHSShiftAmt;
4202   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4203        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4204        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4205        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4206       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4207        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4208        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4209        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4210     LExtOp0 = LHSShiftAmt.getOperand(0);
4211     RExtOp0 = RHSShiftAmt.getOperand(0);
4212   }
4213 
4214   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4215                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4216   if (TryL)
4217     return TryL;
4218 
4219   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4220                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4221   if (TryR)
4222     return TryR;
4223 
4224   return nullptr;
4225 }
4226 
4227 SDValue DAGCombiner::visitXOR(SDNode *N) {
4228   SDValue N0 = N->getOperand(0);
4229   SDValue N1 = N->getOperand(1);
4230   EVT VT = N0.getValueType();
4231 
4232   // fold vector ops
4233   if (VT.isVector()) {
4234     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4235       return FoldedVOp;
4236 
4237     // fold (xor x, 0) -> x, vector edition
4238     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4239       return N1;
4240     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4241       return N0;
4242   }
4243 
4244   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4245   if (N0.isUndef() && N1.isUndef())
4246     return DAG.getConstant(0, SDLoc(N), VT);
4247   // fold (xor x, undef) -> undef
4248   if (N0.isUndef())
4249     return N0;
4250   if (N1.isUndef())
4251     return N1;
4252   // fold (xor c1, c2) -> c1^c2
4253   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4254   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4255   if (N0C && N1C)
4256     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4257   // canonicalize constant to RHS
4258   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4259      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4260     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4261   // fold (xor x, 0) -> x
4262   if (isNullConstant(N1))
4263     return N0;
4264   // reassociate xor
4265   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4266     return RXOR;
4267 
4268   // fold !(x cc y) -> (x !cc y)
4269   SDValue LHS, RHS, CC;
4270   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4271     bool isInt = LHS.getValueType().isInteger();
4272     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4273                                                isInt);
4274 
4275     if (!LegalOperations ||
4276         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4277       switch (N0.getOpcode()) {
4278       default:
4279         llvm_unreachable("Unhandled SetCC Equivalent!");
4280       case ISD::SETCC:
4281         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4282       case ISD::SELECT_CC:
4283         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4284                                N0.getOperand(3), NotCC);
4285       }
4286     }
4287   }
4288 
4289   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4290   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4291       N0.getNode()->hasOneUse() &&
4292       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4293     SDValue V = N0.getOperand(0);
4294     SDLoc DL(N0);
4295     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4296                     DAG.getConstant(1, DL, V.getValueType()));
4297     AddToWorklist(V.getNode());
4298     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4299   }
4300 
4301   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4302   if (isOneConstant(N1) && VT == MVT::i1 &&
4303       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4304     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4305     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4306       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4307       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4308       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4309       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4310       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4311     }
4312   }
4313   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4314   if (isAllOnesConstant(N1) &&
4315       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4316     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4317     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4318       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4319       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4320       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4321       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4322       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4323     }
4324   }
4325   // fold (xor (and x, y), y) -> (and (not x), y)
4326   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4327       N0->getOperand(1) == N1) {
4328     SDValue X = N0->getOperand(0);
4329     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4330     AddToWorklist(NotX.getNode());
4331     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4332   }
4333   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4334   if (N1C && N0.getOpcode() == ISD::XOR) {
4335     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4336       SDLoc DL(N);
4337       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4338                          DAG.getConstant(N1C->getAPIntValue() ^
4339                                          N00C->getAPIntValue(), DL, VT));
4340     }
4341     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4342       SDLoc DL(N);
4343       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4344                          DAG.getConstant(N1C->getAPIntValue() ^
4345                                          N01C->getAPIntValue(), DL, VT));
4346     }
4347   }
4348   // fold (xor x, x) -> 0
4349   if (N0 == N1)
4350     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4351 
4352   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4353   // Here is a concrete example of this equivalence:
4354   // i16   x ==  14
4355   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
4356   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4357   //
4358   // =>
4359   //
4360   // i16     ~1      == 0b1111111111111110
4361   // i16 rol(~1, 14) == 0b1011111111111111
4362   //
4363   // Some additional tips to help conceptualize this transform:
4364   // - Try to see the operation as placing a single zero in a value of all ones.
4365   // - There exists no value for x which would allow the result to contain zero.
4366   // - Values of x larger than the bitwidth are undefined and do not require a
4367   //   consistent result.
4368   // - Pushing the zero left requires shifting one bits in from the right.
4369   // A rotate left of ~1 is a nice way of achieving the desired result.
4370   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4371       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4372     SDLoc DL(N);
4373     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4374                        N0.getOperand(1));
4375   }
4376 
4377   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
4378   if (N0.getOpcode() == N1.getOpcode())
4379     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4380       return Tmp;
4381 
4382   // Simplify the expression using non-local knowledge.
4383   if (!VT.isVector() &&
4384       SimplifyDemandedBits(SDValue(N, 0)))
4385     return SDValue(N, 0);
4386 
4387   return SDValue();
4388 }
4389 
4390 /// Handle transforms common to the three shifts, when the shift amount is a
4391 /// constant.
4392 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4393   SDNode *LHS = N->getOperand(0).getNode();
4394   if (!LHS->hasOneUse()) return SDValue();
4395 
4396   // We want to pull some binops through shifts, so that we have (and (shift))
4397   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
4398   // thing happens with address calculations, so it's important to canonicalize
4399   // it.
4400   bool HighBitSet = false;  // Can we transform this if the high bit is set?
4401 
4402   switch (LHS->getOpcode()) {
4403   default: return SDValue();
4404   case ISD::OR:
4405   case ISD::XOR:
4406     HighBitSet = false; // We can only transform sra if the high bit is clear.
4407     break;
4408   case ISD::AND:
4409     HighBitSet = true;  // We can only transform sra if the high bit is set.
4410     break;
4411   case ISD::ADD:
4412     if (N->getOpcode() != ISD::SHL)
4413       return SDValue(); // only shl(add) not sr[al](add).
4414     HighBitSet = false; // We can only transform sra if the high bit is clear.
4415     break;
4416   }
4417 
4418   // We require the RHS of the binop to be a constant and not opaque as well.
4419   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4420   if (!BinOpCst) return SDValue();
4421 
4422   // FIXME: disable this unless the input to the binop is a shift by a constant.
4423   // If it is not a shift, it pessimizes some common cases like:
4424   //
4425   //    void foo(int *X, int i) { X[i & 1235] = 1; }
4426   //    int bar(int *X, int i) { return X[i & 255]; }
4427   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4428   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
4429        BinOpLHSVal->getOpcode() != ISD::SRA &&
4430        BinOpLHSVal->getOpcode() != ISD::SRL) ||
4431       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
4432     return SDValue();
4433 
4434   EVT VT = N->getValueType(0);
4435 
4436   // If this is a signed shift right, and the high bit is modified by the
4437   // logical operation, do not perform the transformation. The highBitSet
4438   // boolean indicates the value of the high bit of the constant which would
4439   // cause it to be modified for this operation.
4440   if (N->getOpcode() == ISD::SRA) {
4441     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4442     if (BinOpRHSSignSet != HighBitSet)
4443       return SDValue();
4444   }
4445 
4446   if (!TLI.isDesirableToCommuteWithShift(LHS))
4447     return SDValue();
4448 
4449   // Fold the constants, shifting the binop RHS by the shift amount.
4450   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4451                                N->getValueType(0),
4452                                LHS->getOperand(1), N->getOperand(1));
4453   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4454 
4455   // Create the new shift.
4456   SDValue NewShift = DAG.getNode(N->getOpcode(),
4457                                  SDLoc(LHS->getOperand(0)),
4458                                  VT, LHS->getOperand(0), N->getOperand(1));
4459 
4460   // Create the new binop.
4461   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4462 }
4463 
4464 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4465   assert(N->getOpcode() == ISD::TRUNCATE);
4466   assert(N->getOperand(0).getOpcode() == ISD::AND);
4467 
4468   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4469   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4470     SDValue N01 = N->getOperand(0).getOperand(1);
4471 
4472     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
4473       if (!N01C->isOpaque()) {
4474         EVT TruncVT = N->getValueType(0);
4475         SDValue N00 = N->getOperand(0).getOperand(0);
4476         APInt TruncC = N01C->getAPIntValue();
4477         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
4478         SDLoc DL(N);
4479 
4480         return DAG.getNode(ISD::AND, DL, TruncVT,
4481                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
4482                            DAG.getConstant(TruncC, DL, TruncVT));
4483       }
4484     }
4485   }
4486 
4487   return SDValue();
4488 }
4489 
4490 SDValue DAGCombiner::visitRotate(SDNode *N) {
4491   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4492   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4493       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4494     if (SDValue NewOp1 =
4495             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
4496       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4497                          N->getOperand(0), NewOp1);
4498   }
4499   return SDValue();
4500 }
4501 
4502 SDValue DAGCombiner::visitSHL(SDNode *N) {
4503   SDValue N0 = N->getOperand(0);
4504   SDValue N1 = N->getOperand(1);
4505   EVT VT = N0.getValueType();
4506   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4507 
4508   // fold vector ops
4509   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4510   if (VT.isVector()) {
4511     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4512       return FoldedVOp;
4513 
4514     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
4515     // If setcc produces all-one true value then:
4516     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4517     if (N1CV && N1CV->isConstant()) {
4518       if (N0.getOpcode() == ISD::AND) {
4519         SDValue N00 = N0->getOperand(0);
4520         SDValue N01 = N0->getOperand(1);
4521         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
4522 
4523         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4524             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4525                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
4526           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4527                                                      N01CV, N1CV))
4528             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4529         }
4530       } else {
4531         N1C = isConstOrConstSplat(N1);
4532       }
4533     }
4534   }
4535 
4536   // fold (shl c1, c2) -> c1<<c2
4537   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4538   if (N0C && N1C && !N1C->isOpaque())
4539     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4540   // fold (shl 0, x) -> 0
4541   if (isNullConstant(N0))
4542     return N0;
4543   // fold (shl x, c >= size(x)) -> undef
4544   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4545     return DAG.getUNDEF(VT);
4546   // fold (shl x, 0) -> x
4547   if (N1C && N1C->isNullValue())
4548     return N0;
4549   // fold (shl undef, x) -> 0
4550   if (N0.isUndef())
4551     return DAG.getConstant(0, SDLoc(N), VT);
4552   // if (shl x, c) is known to be zero, return 0
4553   if (DAG.MaskedValueIsZero(SDValue(N, 0),
4554                             APInt::getAllOnesValue(OpSizeInBits)))
4555     return DAG.getConstant(0, SDLoc(N), VT);
4556   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4557   if (N1.getOpcode() == ISD::TRUNCATE &&
4558       N1.getOperand(0).getOpcode() == ISD::AND) {
4559     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4560       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4561   }
4562 
4563   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4564     return SDValue(N, 0);
4565 
4566   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4567   if (N1C && N0.getOpcode() == ISD::SHL) {
4568     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4569       SDLoc DL(N);
4570       APInt c1 = N0C1->getAPIntValue();
4571       APInt c2 = N1C->getAPIntValue();
4572       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4573 
4574       APInt Sum = c1 + c2;
4575       if (Sum.uge(OpSizeInBits))
4576         return DAG.getConstant(0, DL, VT);
4577 
4578       return DAG.getNode(
4579           ISD::SHL, DL, VT, N0.getOperand(0),
4580           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4581     }
4582   }
4583 
4584   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4585   // For this to be valid, the second form must not preserve any of the bits
4586   // that are shifted out by the inner shift in the first form.  This means
4587   // the outer shift size must be >= the number of bits added by the ext.
4588   // As a corollary, we don't care what kind of ext it is.
4589   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4590               N0.getOpcode() == ISD::ANY_EXTEND ||
4591               N0.getOpcode() == ISD::SIGN_EXTEND) &&
4592       N0.getOperand(0).getOpcode() == ISD::SHL) {
4593     SDValue N0Op0 = N0.getOperand(0);
4594     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4595       APInt c1 = N0Op0C1->getAPIntValue();
4596       APInt c2 = N1C->getAPIntValue();
4597       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4598 
4599       EVT InnerShiftVT = N0Op0.getValueType();
4600       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4601       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
4602         SDLoc DL(N0);
4603         APInt Sum = c1 + c2;
4604         if (Sum.uge(OpSizeInBits))
4605           return DAG.getConstant(0, DL, VT);
4606 
4607         return DAG.getNode(
4608             ISD::SHL, DL, VT,
4609             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
4610             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4611       }
4612     }
4613   }
4614 
4615   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4616   // Only fold this if the inner zext has no other uses to avoid increasing
4617   // the total number of instructions.
4618   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4619       N0.getOperand(0).getOpcode() == ISD::SRL) {
4620     SDValue N0Op0 = N0.getOperand(0);
4621     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4622       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
4623         uint64_t c1 = N0Op0C1->getZExtValue();
4624         uint64_t c2 = N1C->getZExtValue();
4625         if (c1 == c2) {
4626           SDValue NewOp0 = N0.getOperand(0);
4627           EVT CountVT = NewOp0.getOperand(1).getValueType();
4628           SDLoc DL(N);
4629           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4630                                        NewOp0,
4631                                        DAG.getConstant(c2, DL, CountVT));
4632           AddToWorklist(NewSHL.getNode());
4633           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4634         }
4635       }
4636     }
4637   }
4638 
4639   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
4640   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
4641   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4642       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4643     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4644       uint64_t C1 = N0C1->getZExtValue();
4645       uint64_t C2 = N1C->getZExtValue();
4646       SDLoc DL(N);
4647       if (C1 <= C2)
4648         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4649                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4650       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4651                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4652     }
4653   }
4654 
4655   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4656   //                               (and (srl x, (sub c1, c2), MASK)
4657   // Only fold this if the inner shift has no other uses -- if it does, folding
4658   // this will increase the total number of instructions.
4659   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4660     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4661       uint64_t c1 = N0C1->getZExtValue();
4662       if (c1 < OpSizeInBits) {
4663         uint64_t c2 = N1C->getZExtValue();
4664         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4665         SDValue Shift;
4666         if (c2 > c1) {
4667           Mask = Mask.shl(c2 - c1);
4668           SDLoc DL(N);
4669           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4670                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4671         } else {
4672           Mask = Mask.lshr(c1 - c2);
4673           SDLoc DL(N);
4674           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4675                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4676         }
4677         SDLoc DL(N0);
4678         return DAG.getNode(ISD::AND, DL, VT, Shift,
4679                            DAG.getConstant(Mask, DL, VT));
4680       }
4681     }
4682   }
4683   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4684   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
4685     unsigned BitSize = VT.getScalarSizeInBits();
4686     SDLoc DL(N);
4687     SDValue HiBitsMask =
4688       DAG.getConstant(APInt::getHighBitsSet(BitSize,
4689                                             BitSize - N1C->getZExtValue()),
4690                       DL, VT);
4691     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4692                        HiBitsMask);
4693   }
4694 
4695   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4696   // Variant of version done on multiply, except mul by a power of 2 is turned
4697   // into a shift.
4698   APInt Val;
4699   if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4700       (isa<ConstantSDNode>(N0.getOperand(1)) ||
4701        ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
4702     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4703     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4704     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4705   }
4706 
4707   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
4708   if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
4709     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4710       if (SDValue Folded =
4711               DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
4712         return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
4713     }
4714   }
4715 
4716   if (N1C && !N1C->isOpaque())
4717     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
4718       return NewSHL;
4719 
4720   return SDValue();
4721 }
4722 
4723 SDValue DAGCombiner::visitSRA(SDNode *N) {
4724   SDValue N0 = N->getOperand(0);
4725   SDValue N1 = N->getOperand(1);
4726   EVT VT = N0.getValueType();
4727   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4728 
4729   // fold vector ops
4730   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4731   if (VT.isVector()) {
4732     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4733       return FoldedVOp;
4734 
4735     N1C = isConstOrConstSplat(N1);
4736   }
4737 
4738   // fold (sra c1, c2) -> (sra c1, c2)
4739   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4740   if (N0C && N1C && !N1C->isOpaque())
4741     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4742   // fold (sra 0, x) -> 0
4743   if (isNullConstant(N0))
4744     return N0;
4745   // fold (sra -1, x) -> -1
4746   if (isAllOnesConstant(N0))
4747     return N0;
4748   // fold (sra x, c >= size(x)) -> undef
4749   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4750     return DAG.getUNDEF(VT);
4751   // fold (sra x, 0) -> x
4752   if (N1C && N1C->isNullValue())
4753     return N0;
4754   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4755   // sext_inreg.
4756   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4757     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4758     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4759     if (VT.isVector())
4760       ExtVT = EVT::getVectorVT(*DAG.getContext(),
4761                                ExtVT, VT.getVectorNumElements());
4762     if ((!LegalOperations ||
4763          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4764       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4765                          N0.getOperand(0), DAG.getValueType(ExtVT));
4766   }
4767 
4768   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4769   if (N1C && N0.getOpcode() == ISD::SRA) {
4770     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4771       SDLoc DL(N);
4772       APInt c1 = N0C1->getAPIntValue();
4773       APInt c2 = N1C->getAPIntValue();
4774       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4775 
4776       APInt Sum = c1 + c2;
4777       if (Sum.uge(OpSizeInBits))
4778         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
4779 
4780       return DAG.getNode(
4781           ISD::SRA, DL, VT, N0.getOperand(0),
4782           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4783     }
4784   }
4785 
4786   // fold (sra (shl X, m), (sub result_size, n))
4787   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4788   // result_size - n != m.
4789   // If truncate is free for the target sext(shl) is likely to result in better
4790   // code.
4791   if (N0.getOpcode() == ISD::SHL && N1C) {
4792     // Get the two constanst of the shifts, CN0 = m, CN = n.
4793     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4794     if (N01C) {
4795       LLVMContext &Ctx = *DAG.getContext();
4796       // Determine what the truncate's result bitsize and type would be.
4797       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4798 
4799       if (VT.isVector())
4800         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4801 
4802       // Determine the residual right-shift amount.
4803       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4804 
4805       // If the shift is not a no-op (in which case this should be just a sign
4806       // extend already), the truncated to type is legal, sign_extend is legal
4807       // on that type, and the truncate to that type is both legal and free,
4808       // perform the transform.
4809       if ((ShiftAmt > 0) &&
4810           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4811           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4812           TLI.isTruncateFree(VT, TruncVT)) {
4813 
4814         SDLoc DL(N);
4815         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4816             getShiftAmountTy(N0.getOperand(0).getValueType()));
4817         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4818                                     N0.getOperand(0), Amt);
4819         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4820                                     Shift);
4821         return DAG.getNode(ISD::SIGN_EXTEND, DL,
4822                            N->getValueType(0), Trunc);
4823       }
4824     }
4825   }
4826 
4827   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4828   if (N1.getOpcode() == ISD::TRUNCATE &&
4829       N1.getOperand(0).getOpcode() == ISD::AND) {
4830     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4831       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4832   }
4833 
4834   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4835   //      if c1 is equal to the number of bits the trunc removes
4836   if (N0.getOpcode() == ISD::TRUNCATE &&
4837       (N0.getOperand(0).getOpcode() == ISD::SRL ||
4838        N0.getOperand(0).getOpcode() == ISD::SRA) &&
4839       N0.getOperand(0).hasOneUse() &&
4840       N0.getOperand(0).getOperand(1).hasOneUse() &&
4841       N1C) {
4842     SDValue N0Op0 = N0.getOperand(0);
4843     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4844       unsigned LargeShiftVal = LargeShift->getZExtValue();
4845       EVT LargeVT = N0Op0.getValueType();
4846 
4847       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4848         SDLoc DL(N);
4849         SDValue Amt =
4850           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4851                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
4852         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4853                                   N0Op0.getOperand(0), Amt);
4854         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4855       }
4856     }
4857   }
4858 
4859   // Simplify, based on bits shifted out of the LHS.
4860   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4861     return SDValue(N, 0);
4862 
4863 
4864   // If the sign bit is known to be zero, switch this to a SRL.
4865   if (DAG.SignBitIsZero(N0))
4866     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4867 
4868   if (N1C && !N1C->isOpaque())
4869     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
4870       return NewSRA;
4871 
4872   return SDValue();
4873 }
4874 
4875 SDValue DAGCombiner::visitSRL(SDNode *N) {
4876   SDValue N0 = N->getOperand(0);
4877   SDValue N1 = N->getOperand(1);
4878   EVT VT = N0.getValueType();
4879   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4880 
4881   // fold vector ops
4882   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4883   if (VT.isVector()) {
4884     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4885       return FoldedVOp;
4886 
4887     N1C = isConstOrConstSplat(N1);
4888   }
4889 
4890   // fold (srl c1, c2) -> c1 >>u c2
4891   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4892   if (N0C && N1C && !N1C->isOpaque())
4893     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
4894   // fold (srl 0, x) -> 0
4895   if (isNullConstant(N0))
4896     return N0;
4897   // fold (srl x, c >= size(x)) -> undef
4898   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4899     return DAG.getUNDEF(VT);
4900   // fold (srl x, 0) -> x
4901   if (N1C && N1C->isNullValue())
4902     return N0;
4903   // if (srl x, c) is known to be zero, return 0
4904   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4905                                    APInt::getAllOnesValue(OpSizeInBits)))
4906     return DAG.getConstant(0, SDLoc(N), VT);
4907 
4908   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4909   if (N1C && N0.getOpcode() == ISD::SRL) {
4910     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4911       SDLoc DL(N);
4912       APInt c1 = N0C1->getAPIntValue();
4913       APInt c2 = N1C->getAPIntValue();
4914       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4915 
4916       APInt Sum = c1 + c2;
4917       if (Sum.uge(OpSizeInBits))
4918         return DAG.getConstant(0, DL, VT);
4919 
4920       return DAG.getNode(
4921           ISD::SRL, DL, VT, N0.getOperand(0),
4922           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4923     }
4924   }
4925 
4926   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4927   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4928       N0.getOperand(0).getOpcode() == ISD::SRL &&
4929       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4930     uint64_t c1 =
4931       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4932     uint64_t c2 = N1C->getZExtValue();
4933     EVT InnerShiftVT = N0.getOperand(0).getValueType();
4934     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4935     uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4936     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4937     if (c1 + OpSizeInBits == InnerShiftSize) {
4938       SDLoc DL(N0);
4939       if (c1 + c2 >= InnerShiftSize)
4940         return DAG.getConstant(0, DL, VT);
4941       return DAG.getNode(ISD::TRUNCATE, DL, VT,
4942                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
4943                                      N0.getOperand(0)->getOperand(0),
4944                                      DAG.getConstant(c1 + c2, DL,
4945                                                      ShiftCountVT)));
4946     }
4947   }
4948 
4949   // fold (srl (shl x, c), c) -> (and x, cst2)
4950   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
4951     unsigned BitSize = N0.getScalarValueSizeInBits();
4952     if (BitSize <= 64) {
4953       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
4954       SDLoc DL(N);
4955       return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4956                          DAG.getConstant(~0ULL >> ShAmt, DL, VT));
4957     }
4958   }
4959 
4960   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4961   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4962     // Shifting in all undef bits?
4963     EVT SmallVT = N0.getOperand(0).getValueType();
4964     unsigned BitSize = SmallVT.getScalarSizeInBits();
4965     if (N1C->getZExtValue() >= BitSize)
4966       return DAG.getUNDEF(VT);
4967 
4968     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4969       uint64_t ShiftAmt = N1C->getZExtValue();
4970       SDLoc DL0(N0);
4971       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
4972                                        N0.getOperand(0),
4973                           DAG.getConstant(ShiftAmt, DL0,
4974                                           getShiftAmountTy(SmallVT)));
4975       AddToWorklist(SmallShift.getNode());
4976       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
4977       SDLoc DL(N);
4978       return DAG.getNode(ISD::AND, DL, VT,
4979                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
4980                          DAG.getConstant(Mask, DL, VT));
4981     }
4982   }
4983 
4984   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
4985   // bit, which is unmodified by sra.
4986   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
4987     if (N0.getOpcode() == ISD::SRA)
4988       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4989   }
4990 
4991   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
4992   if (N1C && N0.getOpcode() == ISD::CTLZ &&
4993       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
4994     APInt KnownZero, KnownOne;
4995     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
4996 
4997     // If any of the input bits are KnownOne, then the input couldn't be all
4998     // zeros, thus the result of the srl will always be zero.
4999     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5000 
5001     // If all of the bits input the to ctlz node are known to be zero, then
5002     // the result of the ctlz is "32" and the result of the shift is one.
5003     APInt UnknownBits = ~KnownZero;
5004     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5005 
5006     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5007     if ((UnknownBits & (UnknownBits - 1)) == 0) {
5008       // Okay, we know that only that the single bit specified by UnknownBits
5009       // could be set on input to the CTLZ node. If this bit is set, the SRL
5010       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5011       // to an SRL/XOR pair, which is likely to simplify more.
5012       unsigned ShAmt = UnknownBits.countTrailingZeros();
5013       SDValue Op = N0.getOperand(0);
5014 
5015       if (ShAmt) {
5016         SDLoc DL(N0);
5017         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5018                   DAG.getConstant(ShAmt, DL,
5019                                   getShiftAmountTy(Op.getValueType())));
5020         AddToWorklist(Op.getNode());
5021       }
5022 
5023       SDLoc DL(N);
5024       return DAG.getNode(ISD::XOR, DL, VT,
5025                          Op, DAG.getConstant(1, DL, VT));
5026     }
5027   }
5028 
5029   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5030   if (N1.getOpcode() == ISD::TRUNCATE &&
5031       N1.getOperand(0).getOpcode() == ISD::AND) {
5032     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5033       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5034   }
5035 
5036   // fold operands of srl based on knowledge that the low bits are not
5037   // demanded.
5038   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5039     return SDValue(N, 0);
5040 
5041   if (N1C && !N1C->isOpaque())
5042     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5043       return NewSRL;
5044 
5045   // Attempt to convert a srl of a load into a narrower zero-extending load.
5046   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5047     return NarrowLoad;
5048 
5049   // Here is a common situation. We want to optimize:
5050   //
5051   //   %a = ...
5052   //   %b = and i32 %a, 2
5053   //   %c = srl i32 %b, 1
5054   //   brcond i32 %c ...
5055   //
5056   // into
5057   //
5058   //   %a = ...
5059   //   %b = and %a, 2
5060   //   %c = setcc eq %b, 0
5061   //   brcond %c ...
5062   //
5063   // However when after the source operand of SRL is optimized into AND, the SRL
5064   // itself may not be optimized further. Look for it and add the BRCOND into
5065   // the worklist.
5066   if (N->hasOneUse()) {
5067     SDNode *Use = *N->use_begin();
5068     if (Use->getOpcode() == ISD::BRCOND)
5069       AddToWorklist(Use);
5070     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5071       // Also look pass the truncate.
5072       Use = *Use->use_begin();
5073       if (Use->getOpcode() == ISD::BRCOND)
5074         AddToWorklist(Use);
5075     }
5076   }
5077 
5078   return SDValue();
5079 }
5080 
5081 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5082   SDValue N0 = N->getOperand(0);
5083   EVT VT = N->getValueType(0);
5084 
5085   // fold (bswap c1) -> c2
5086   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5087     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5088   // fold (bswap (bswap x)) -> x
5089   if (N0.getOpcode() == ISD::BSWAP)
5090     return N0->getOperand(0);
5091   return SDValue();
5092 }
5093 
5094 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5095   SDValue N0 = N->getOperand(0);
5096 
5097   // fold (bitreverse (bitreverse x)) -> x
5098   if (N0.getOpcode() == ISD::BITREVERSE)
5099     return N0.getOperand(0);
5100   return SDValue();
5101 }
5102 
5103 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5104   SDValue N0 = N->getOperand(0);
5105   EVT VT = N->getValueType(0);
5106 
5107   // fold (ctlz c1) -> c2
5108   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5109     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5110   return SDValue();
5111 }
5112 
5113 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5114   SDValue N0 = N->getOperand(0);
5115   EVT VT = N->getValueType(0);
5116 
5117   // fold (ctlz_zero_undef c1) -> c2
5118   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5119     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5120   return SDValue();
5121 }
5122 
5123 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5124   SDValue N0 = N->getOperand(0);
5125   EVT VT = N->getValueType(0);
5126 
5127   // fold (cttz c1) -> c2
5128   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5129     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5130   return SDValue();
5131 }
5132 
5133 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5134   SDValue N0 = N->getOperand(0);
5135   EVT VT = N->getValueType(0);
5136 
5137   // fold (cttz_zero_undef c1) -> c2
5138   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5139     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5140   return SDValue();
5141 }
5142 
5143 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5144   SDValue N0 = N->getOperand(0);
5145   EVT VT = N->getValueType(0);
5146 
5147   // fold (ctpop c1) -> c2
5148   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5149     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5150   return SDValue();
5151 }
5152 
5153 
5154 /// \brief Generate Min/Max node
5155 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5156                                    SDValue RHS, SDValue True, SDValue False,
5157                                    ISD::CondCode CC, const TargetLowering &TLI,
5158                                    SelectionDAG &DAG) {
5159   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5160     return SDValue();
5161 
5162   switch (CC) {
5163   case ISD::SETOLT:
5164   case ISD::SETOLE:
5165   case ISD::SETLT:
5166   case ISD::SETLE:
5167   case ISD::SETULT:
5168   case ISD::SETULE: {
5169     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5170     if (TLI.isOperationLegal(Opcode, VT))
5171       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5172     return SDValue();
5173   }
5174   case ISD::SETOGT:
5175   case ISD::SETOGE:
5176   case ISD::SETGT:
5177   case ISD::SETGE:
5178   case ISD::SETUGT:
5179   case ISD::SETUGE: {
5180     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5181     if (TLI.isOperationLegal(Opcode, VT))
5182       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5183     return SDValue();
5184   }
5185   default:
5186     return SDValue();
5187   }
5188 }
5189 
5190 // TODO: We should handle other cases of selecting between {-1,0,1} here.
5191 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5192   SDValue Cond = N->getOperand(0);
5193   SDValue N1 = N->getOperand(1);
5194   SDValue N2 = N->getOperand(2);
5195   EVT VT = N->getValueType(0);
5196   EVT CondVT = Cond.getValueType();
5197   SDLoc DL(N);
5198 
5199   // fold (select Cond, 0, 1) -> (xor Cond, 1)
5200   // We can't do this reliably if integer based booleans have different contents
5201   // to floating point based booleans. This is because we can't tell whether we
5202   // have an integer-based boolean or a floating-point-based boolean unless we
5203   // can find the SETCC that produced it and inspect its operands. This is
5204   // fairly easy if C is the SETCC node, but it can potentially be
5205   // undiscoverable (or not reasonably discoverable). For example, it could be
5206   // in another basic block or it could require searching a complicated
5207   // expression.
5208   if (VT.isInteger() &&
5209       (CondVT == MVT::i1 || (CondVT.isInteger() &&
5210                              TLI.getBooleanContents(false, true) ==
5211                                  TargetLowering::ZeroOrOneBooleanContent &&
5212                              TLI.getBooleanContents(false, false) ==
5213                                  TargetLowering::ZeroOrOneBooleanContent)) &&
5214       isNullConstant(N1) && isOneConstant(N2)) {
5215     SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
5216                                   DAG.getConstant(1, DL, CondVT));
5217     if (VT.bitsEq(CondVT))
5218       return NotCond;
5219     return DAG.getZExtOrTrunc(NotCond, DL, VT);
5220   }
5221 
5222   return SDValue();
5223 }
5224 
5225 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5226   SDValue N0 = N->getOperand(0);
5227   SDValue N1 = N->getOperand(1);
5228   SDValue N2 = N->getOperand(2);
5229   EVT VT = N->getValueType(0);
5230   EVT VT0 = N0.getValueType();
5231 
5232   // fold (select C, X, X) -> X
5233   if (N1 == N2)
5234     return N1;
5235   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5236     // fold (select true, X, Y) -> X
5237     // fold (select false, X, Y) -> Y
5238     return !N0C->isNullValue() ? N1 : N2;
5239   }
5240   // fold (select C, 1, X) -> (or C, X)
5241   if (VT == MVT::i1 && isOneConstant(N1))
5242     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5243 
5244   if (SDValue V = foldSelectOfConstants(N))
5245     return V;
5246 
5247   // fold (select C, 0, X) -> (and (not C), X)
5248   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5249     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5250     AddToWorklist(NOTNode.getNode());
5251     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5252   }
5253   // fold (select C, X, 1) -> (or (not C), X)
5254   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
5255     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5256     AddToWorklist(NOTNode.getNode());
5257     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
5258   }
5259   // fold (select C, X, 0) -> (and C, X)
5260   if (VT == MVT::i1 && isNullConstant(N2))
5261     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5262   // fold (select X, X, Y) -> (or X, Y)
5263   // fold (select X, 1, Y) -> (or X, Y)
5264   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5265     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5266   // fold (select X, Y, X) -> (and X, Y)
5267   // fold (select X, Y, 0) -> (and X, Y)
5268   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
5269     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5270 
5271   // If we can fold this based on the true/false value, do so.
5272   if (SimplifySelectOps(N, N1, N2))
5273     return SDValue(N, 0);  // Don't revisit N.
5274 
5275   if (VT0 == MVT::i1) {
5276     // The code in this block deals with the following 2 equivalences:
5277     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
5278     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
5279     // The target can specify its prefered form with the
5280     // shouldNormalizeToSelectSequence() callback. However we always transform
5281     // to the right anyway if we find the inner select exists in the DAG anyway
5282     // and we always transform to the left side if we know that we can further
5283     // optimize the combination of the conditions.
5284     bool normalizeToSequence
5285       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
5286     // select (and Cond0, Cond1), X, Y
5287     //   -> select Cond0, (select Cond1, X, Y), Y
5288     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5289       SDValue Cond0 = N0->getOperand(0);
5290       SDValue Cond1 = N0->getOperand(1);
5291       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5292                                         N1.getValueType(), Cond1, N1, N2);
5293       if (normalizeToSequence || !InnerSelect.use_empty())
5294         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5295                            InnerSelect, N2);
5296     }
5297     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5298     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5299       SDValue Cond0 = N0->getOperand(0);
5300       SDValue Cond1 = N0->getOperand(1);
5301       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5302                                         N1.getValueType(), Cond1, N1, N2);
5303       if (normalizeToSequence || !InnerSelect.use_empty())
5304         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5305                            InnerSelect);
5306     }
5307 
5308     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5309     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
5310       SDValue N1_0 = N1->getOperand(0);
5311       SDValue N1_1 = N1->getOperand(1);
5312       SDValue N1_2 = N1->getOperand(2);
5313       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5314         // Create the actual and node if we can generate good code for it.
5315         if (!normalizeToSequence) {
5316           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5317                                     N0, N1_0);
5318           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5319                              N1_1, N2);
5320         }
5321         // Otherwise see if we can optimize the "and" to a better pattern.
5322         if (SDValue Combined = visitANDLike(N0, N1_0, N))
5323           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5324                              N1_1, N2);
5325       }
5326     }
5327     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5328     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
5329       SDValue N2_0 = N2->getOperand(0);
5330       SDValue N2_1 = N2->getOperand(1);
5331       SDValue N2_2 = N2->getOperand(2);
5332       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5333         // Create the actual or node if we can generate good code for it.
5334         if (!normalizeToSequence) {
5335           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5336                                    N0, N2_0);
5337           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5338                              N1, N2_2);
5339         }
5340         // Otherwise see if we can optimize to a better pattern.
5341         if (SDValue Combined = visitORLike(N0, N2_0, N))
5342           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5343                              N1, N2_2);
5344       }
5345     }
5346   }
5347 
5348   // select (xor Cond, 1), X, Y -> select Cond, Y, X
5349   // select (xor Cond, 0), X, Y -> selext Cond, X, Y
5350   if (VT0 == MVT::i1) {
5351     if (N0->getOpcode() == ISD::XOR) {
5352       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
5353         SDValue Cond0 = N0->getOperand(0);
5354         if (C->isOne())
5355           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
5356                              Cond0, N2, N1);
5357         else
5358           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
5359                              Cond0, N1, N2);
5360       }
5361     }
5362   }
5363 
5364   // fold selects based on a setcc into other things, such as min/max/abs
5365   if (N0.getOpcode() == ISD::SETCC) {
5366     // select x, y (fcmp lt x, y) -> fminnum x, y
5367     // select x, y (fcmp gt x, y) -> fmaxnum x, y
5368     //
5369     // This is OK if we don't care about what happens if either operand is a
5370     // NaN.
5371     //
5372 
5373     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
5374     // no signed zeros as well as no nans.
5375     const TargetOptions &Options = DAG.getTarget().Options;
5376     if (Options.UnsafeFPMath &&
5377         VT.isFloatingPoint() && N0.hasOneUse() &&
5378         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
5379       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5380 
5381       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
5382                                                 N0.getOperand(1), N1, N2, CC,
5383                                                 TLI, DAG))
5384         return FMinMax;
5385     }
5386 
5387     if ((!LegalOperations &&
5388          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5389         TLI.isOperationLegal(ISD::SELECT_CC, VT))
5390       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5391                          N0.getOperand(0), N0.getOperand(1),
5392                          N1, N2, N0.getOperand(2));
5393     return SimplifySelect(SDLoc(N), N0, N1, N2);
5394   }
5395 
5396   return SDValue();
5397 }
5398 
5399 static
5400 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5401   SDLoc DL(N);
5402   EVT LoVT, HiVT;
5403   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5404 
5405   // Split the inputs.
5406   SDValue Lo, Hi, LL, LH, RL, RH;
5407   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5408   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5409 
5410   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5411   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5412 
5413   return std::make_pair(Lo, Hi);
5414 }
5415 
5416 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5417 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5418 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
5419   SDLoc DL(N);
5420   SDValue Cond = N->getOperand(0);
5421   SDValue LHS = N->getOperand(1);
5422   SDValue RHS = N->getOperand(2);
5423   EVT VT = N->getValueType(0);
5424   int NumElems = VT.getVectorNumElements();
5425   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
5426          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5427          Cond.getOpcode() == ISD::BUILD_VECTOR);
5428 
5429   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5430   // binary ones here.
5431   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5432     return SDValue();
5433 
5434   // We're sure we have an even number of elements due to the
5435   // concat_vectors we have as arguments to vselect.
5436   // Skip BV elements until we find one that's not an UNDEF
5437   // After we find an UNDEF element, keep looping until we get to half the
5438   // length of the BV and see if all the non-undef nodes are the same.
5439   ConstantSDNode *BottomHalf = nullptr;
5440   for (int i = 0; i < NumElems / 2; ++i) {
5441     if (Cond->getOperand(i)->isUndef())
5442       continue;
5443 
5444     if (BottomHalf == nullptr)
5445       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5446     else if (Cond->getOperand(i).getNode() != BottomHalf)
5447       return SDValue();
5448   }
5449 
5450   // Do the same for the second half of the BuildVector
5451   ConstantSDNode *TopHalf = nullptr;
5452   for (int i = NumElems / 2; i < NumElems; ++i) {
5453     if (Cond->getOperand(i)->isUndef())
5454       continue;
5455 
5456     if (TopHalf == nullptr)
5457       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5458     else if (Cond->getOperand(i).getNode() != TopHalf)
5459       return SDValue();
5460   }
5461 
5462   assert(TopHalf && BottomHalf &&
5463          "One half of the selector was all UNDEFs and the other was all the "
5464          "same value. This should have been addressed before this function.");
5465   return DAG.getNode(
5466       ISD::CONCAT_VECTORS, DL, VT,
5467       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5468       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5469 }
5470 
5471 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5472 
5473   if (Level >= AfterLegalizeTypes)
5474     return SDValue();
5475 
5476   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5477   SDValue Mask = MSC->getMask();
5478   SDValue Data  = MSC->getValue();
5479   SDLoc DL(N);
5480 
5481   // If the MSCATTER data type requires splitting and the mask is provided by a
5482   // SETCC, then split both nodes and its operands before legalization. This
5483   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5484   // and enables future optimizations (e.g. min/max pattern matching on X86).
5485   if (Mask.getOpcode() != ISD::SETCC)
5486     return SDValue();
5487 
5488   // Check if any splitting is required.
5489   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5490       TargetLowering::TypeSplitVector)
5491     return SDValue();
5492   SDValue MaskLo, MaskHi, Lo, Hi;
5493   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5494 
5495   EVT LoVT, HiVT;
5496   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5497 
5498   SDValue Chain = MSC->getChain();
5499 
5500   EVT MemoryVT = MSC->getMemoryVT();
5501   unsigned Alignment = MSC->getOriginalAlignment();
5502 
5503   EVT LoMemVT, HiMemVT;
5504   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5505 
5506   SDValue DataLo, DataHi;
5507   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5508 
5509   SDValue BasePtr = MSC->getBasePtr();
5510   SDValue IndexLo, IndexHi;
5511   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5512 
5513   MachineMemOperand *MMO = DAG.getMachineFunction().
5514     getMachineMemOperand(MSC->getPointerInfo(),
5515                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5516                           Alignment, MSC->getAAInfo(), MSC->getRanges());
5517 
5518   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5519   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5520                             DL, OpsLo, MMO);
5521 
5522   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5523   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5524                             DL, OpsHi, MMO);
5525 
5526   AddToWorklist(Lo.getNode());
5527   AddToWorklist(Hi.getNode());
5528 
5529   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5530 }
5531 
5532 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5533 
5534   if (Level >= AfterLegalizeTypes)
5535     return SDValue();
5536 
5537   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
5538   SDValue Mask = MST->getMask();
5539   SDValue Data  = MST->getValue();
5540   SDLoc DL(N);
5541 
5542   // If the MSTORE data type requires splitting and the mask is provided by a
5543   // SETCC, then split both nodes and its operands before legalization. This
5544   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5545   // and enables future optimizations (e.g. min/max pattern matching on X86).
5546   if (Mask.getOpcode() == ISD::SETCC) {
5547 
5548     // Check if any splitting is required.
5549     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5550         TargetLowering::TypeSplitVector)
5551       return SDValue();
5552 
5553     SDValue MaskLo, MaskHi, Lo, Hi;
5554     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5555 
5556     EVT LoVT, HiVT;
5557     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
5558 
5559     SDValue Chain = MST->getChain();
5560     SDValue Ptr   = MST->getBasePtr();
5561 
5562     EVT MemoryVT = MST->getMemoryVT();
5563     unsigned Alignment = MST->getOriginalAlignment();
5564 
5565     // if Alignment is equal to the vector size,
5566     // take the half of it for the second part
5567     unsigned SecondHalfAlignment =
5568       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
5569          Alignment/2 : Alignment;
5570 
5571     EVT LoMemVT, HiMemVT;
5572     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5573 
5574     SDValue DataLo, DataHi;
5575     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5576 
5577     MachineMemOperand *MMO = DAG.getMachineFunction().
5578       getMachineMemOperand(MST->getPointerInfo(),
5579                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5580                            Alignment, MST->getAAInfo(), MST->getRanges());
5581 
5582     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5583                             MST->isTruncatingStore());
5584 
5585     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5586     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5587                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5588 
5589     MMO = DAG.getMachineFunction().
5590       getMachineMemOperand(MST->getPointerInfo(),
5591                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
5592                            SecondHalfAlignment, MST->getAAInfo(),
5593                            MST->getRanges());
5594 
5595     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5596                             MST->isTruncatingStore());
5597 
5598     AddToWorklist(Lo.getNode());
5599     AddToWorklist(Hi.getNode());
5600 
5601     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5602   }
5603   return SDValue();
5604 }
5605 
5606 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5607 
5608   if (Level >= AfterLegalizeTypes)
5609     return SDValue();
5610 
5611   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
5612   SDValue Mask = MGT->getMask();
5613   SDLoc DL(N);
5614 
5615   // If the MGATHER result requires splitting and the mask is provided by a
5616   // SETCC, then split both nodes and its operands before legalization. This
5617   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5618   // and enables future optimizations (e.g. min/max pattern matching on X86).
5619 
5620   if (Mask.getOpcode() != ISD::SETCC)
5621     return SDValue();
5622 
5623   EVT VT = N->getValueType(0);
5624 
5625   // Check if any splitting is required.
5626   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5627       TargetLowering::TypeSplitVector)
5628     return SDValue();
5629 
5630   SDValue MaskLo, MaskHi, Lo, Hi;
5631   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5632 
5633   SDValue Src0 = MGT->getValue();
5634   SDValue Src0Lo, Src0Hi;
5635   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5636 
5637   EVT LoVT, HiVT;
5638   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5639 
5640   SDValue Chain = MGT->getChain();
5641   EVT MemoryVT = MGT->getMemoryVT();
5642   unsigned Alignment = MGT->getOriginalAlignment();
5643 
5644   EVT LoMemVT, HiMemVT;
5645   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5646 
5647   SDValue BasePtr = MGT->getBasePtr();
5648   SDValue Index = MGT->getIndex();
5649   SDValue IndexLo, IndexHi;
5650   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5651 
5652   MachineMemOperand *MMO = DAG.getMachineFunction().
5653     getMachineMemOperand(MGT->getPointerInfo(),
5654                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5655                           Alignment, MGT->getAAInfo(), MGT->getRanges());
5656 
5657   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5658   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5659                             MMO);
5660 
5661   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5662   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5663                             MMO);
5664 
5665   AddToWorklist(Lo.getNode());
5666   AddToWorklist(Hi.getNode());
5667 
5668   // Build a factor node to remember that this load is independent of the
5669   // other one.
5670   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5671                       Hi.getValue(1));
5672 
5673   // Legalized the chain result - switch anything that used the old chain to
5674   // use the new one.
5675   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5676 
5677   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5678 
5679   SDValue RetOps[] = { GatherRes, Chain };
5680   return DAG.getMergeValues(RetOps, DL);
5681 }
5682 
5683 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5684 
5685   if (Level >= AfterLegalizeTypes)
5686     return SDValue();
5687 
5688   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
5689   SDValue Mask = MLD->getMask();
5690   SDLoc DL(N);
5691 
5692   // If the MLOAD result requires splitting and the mask is provided by a
5693   // SETCC, then split both nodes and its operands before legalization. This
5694   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5695   // and enables future optimizations (e.g. min/max pattern matching on X86).
5696 
5697   if (Mask.getOpcode() == ISD::SETCC) {
5698     EVT VT = N->getValueType(0);
5699 
5700     // Check if any splitting is required.
5701     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5702         TargetLowering::TypeSplitVector)
5703       return SDValue();
5704 
5705     SDValue MaskLo, MaskHi, Lo, Hi;
5706     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5707 
5708     SDValue Src0 = MLD->getSrc0();
5709     SDValue Src0Lo, Src0Hi;
5710     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5711 
5712     EVT LoVT, HiVT;
5713     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5714 
5715     SDValue Chain = MLD->getChain();
5716     SDValue Ptr   = MLD->getBasePtr();
5717     EVT MemoryVT = MLD->getMemoryVT();
5718     unsigned Alignment = MLD->getOriginalAlignment();
5719 
5720     // if Alignment is equal to the vector size,
5721     // take the half of it for the second part
5722     unsigned SecondHalfAlignment =
5723       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5724          Alignment/2 : Alignment;
5725 
5726     EVT LoMemVT, HiMemVT;
5727     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5728 
5729     MachineMemOperand *MMO = DAG.getMachineFunction().
5730     getMachineMemOperand(MLD->getPointerInfo(),
5731                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5732                          Alignment, MLD->getAAInfo(), MLD->getRanges());
5733 
5734     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5735                            ISD::NON_EXTLOAD);
5736 
5737     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5738     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5739                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5740 
5741     MMO = DAG.getMachineFunction().
5742     getMachineMemOperand(MLD->getPointerInfo(),
5743                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
5744                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5745 
5746     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5747                            ISD::NON_EXTLOAD);
5748 
5749     AddToWorklist(Lo.getNode());
5750     AddToWorklist(Hi.getNode());
5751 
5752     // Build a factor node to remember that this load is independent of the
5753     // other one.
5754     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5755                         Hi.getValue(1));
5756 
5757     // Legalized the chain result - switch anything that used the old chain to
5758     // use the new one.
5759     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5760 
5761     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5762 
5763     SDValue RetOps[] = { LoadRes, Chain };
5764     return DAG.getMergeValues(RetOps, DL);
5765   }
5766   return SDValue();
5767 }
5768 
5769 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5770   SDValue N0 = N->getOperand(0);
5771   SDValue N1 = N->getOperand(1);
5772   SDValue N2 = N->getOperand(2);
5773   SDLoc DL(N);
5774 
5775   // Canonicalize integer abs.
5776   // vselect (setg[te] X,  0),  X, -X ->
5777   // vselect (setgt    X, -1),  X, -X ->
5778   // vselect (setl[te] X,  0), -X,  X ->
5779   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5780   if (N0.getOpcode() == ISD::SETCC) {
5781     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5782     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5783     bool isAbs = false;
5784     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5785 
5786     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5787          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5788         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5789       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
5790     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5791              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5792       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5793 
5794     if (isAbs) {
5795       EVT VT = LHS.getValueType();
5796       SDValue Shift = DAG.getNode(
5797           ISD::SRA, DL, VT, LHS,
5798           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
5799       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5800       AddToWorklist(Shift.getNode());
5801       AddToWorklist(Add.getNode());
5802       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5803     }
5804   }
5805 
5806   if (SimplifySelectOps(N, N1, N2))
5807     return SDValue(N, 0);  // Don't revisit N.
5808 
5809   // If the VSELECT result requires splitting and the mask is provided by a
5810   // SETCC, then split both nodes and its operands before legalization. This
5811   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5812   // and enables future optimizations (e.g. min/max pattern matching on X86).
5813   if (N0.getOpcode() == ISD::SETCC) {
5814     EVT VT = N->getValueType(0);
5815 
5816     // Check if any splitting is required.
5817     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5818         TargetLowering::TypeSplitVector)
5819       return SDValue();
5820 
5821     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5822     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5823     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5824     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5825 
5826     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5827     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5828 
5829     // Add the new VSELECT nodes to the work list in case they need to be split
5830     // again.
5831     AddToWorklist(Lo.getNode());
5832     AddToWorklist(Hi.getNode());
5833 
5834     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5835   }
5836 
5837   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5838   if (ISD::isBuildVectorAllOnes(N0.getNode()))
5839     return N1;
5840   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5841   if (ISD::isBuildVectorAllZeros(N0.getNode()))
5842     return N2;
5843 
5844   // The ConvertSelectToConcatVector function is assuming both the above
5845   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5846   // and addressed.
5847   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5848       N2.getOpcode() == ISD::CONCAT_VECTORS &&
5849       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
5850     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
5851       return CV;
5852   }
5853 
5854   return SDValue();
5855 }
5856 
5857 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5858   SDValue N0 = N->getOperand(0);
5859   SDValue N1 = N->getOperand(1);
5860   SDValue N2 = N->getOperand(2);
5861   SDValue N3 = N->getOperand(3);
5862   SDValue N4 = N->getOperand(4);
5863   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5864 
5865   // fold select_cc lhs, rhs, x, x, cc -> x
5866   if (N2 == N3)
5867     return N2;
5868 
5869   // Determine if the condition we're dealing with is constant
5870   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
5871                                   CC, SDLoc(N), false)) {
5872     AddToWorklist(SCC.getNode());
5873 
5874     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5875       if (!SCCC->isNullValue())
5876         return N2;    // cond always true -> true val
5877       else
5878         return N3;    // cond always false -> false val
5879     } else if (SCC->isUndef()) {
5880       // When the condition is UNDEF, just return the first operand. This is
5881       // coherent the DAG creation, no setcc node is created in this case
5882       return N2;
5883     } else if (SCC.getOpcode() == ISD::SETCC) {
5884       // Fold to a simpler select_cc
5885       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
5886                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
5887                          SCC.getOperand(2));
5888     }
5889   }
5890 
5891   // If we can fold this based on the true/false value, do so.
5892   if (SimplifySelectOps(N, N2, N3))
5893     return SDValue(N, 0);  // Don't revisit N.
5894 
5895   // fold select_cc into other things, such as min/max/abs
5896   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
5897 }
5898 
5899 SDValue DAGCombiner::visitSETCC(SDNode *N) {
5900   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
5901                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
5902                        SDLoc(N));
5903 }
5904 
5905 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
5906   SDValue LHS = N->getOperand(0);
5907   SDValue RHS = N->getOperand(1);
5908   SDValue Carry = N->getOperand(2);
5909   SDValue Cond = N->getOperand(3);
5910 
5911   // If Carry is false, fold to a regular SETCC.
5912   if (Carry.getOpcode() == ISD::CARRY_FALSE)
5913     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
5914 
5915   return SDValue();
5916 }
5917 
5918 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
5919 /// a build_vector of constants.
5920 /// This function is called by the DAGCombiner when visiting sext/zext/aext
5921 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
5922 /// Vector extends are not folded if operations are legal; this is to
5923 /// avoid introducing illegal build_vector dag nodes.
5924 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
5925                                          SelectionDAG &DAG, bool LegalTypes,
5926                                          bool LegalOperations) {
5927   unsigned Opcode = N->getOpcode();
5928   SDValue N0 = N->getOperand(0);
5929   EVT VT = N->getValueType(0);
5930 
5931   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
5932          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5933          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
5934          && "Expected EXTEND dag node in input!");
5935 
5936   // fold (sext c1) -> c1
5937   // fold (zext c1) -> c1
5938   // fold (aext c1) -> c1
5939   if (isa<ConstantSDNode>(N0))
5940     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
5941 
5942   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
5943   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
5944   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
5945   EVT SVT = VT.getScalarType();
5946   if (!(VT.isVector() &&
5947       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
5948       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
5949     return nullptr;
5950 
5951   // We can fold this node into a build_vector.
5952   unsigned VTBits = SVT.getSizeInBits();
5953   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
5954   SmallVector<SDValue, 8> Elts;
5955   unsigned NumElts = VT.getVectorNumElements();
5956   SDLoc DL(N);
5957 
5958   for (unsigned i=0; i != NumElts; ++i) {
5959     SDValue Op = N0->getOperand(i);
5960     if (Op->isUndef()) {
5961       Elts.push_back(DAG.getUNDEF(SVT));
5962       continue;
5963     }
5964 
5965     SDLoc DL(Op);
5966     // Get the constant value and if needed trunc it to the size of the type.
5967     // Nodes like build_vector might have constants wider than the scalar type.
5968     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
5969     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5970       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
5971     else
5972       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
5973   }
5974 
5975   return DAG.getBuildVector(VT, DL, Elts).getNode();
5976 }
5977 
5978 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
5979 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
5980 // transformation. Returns true if extension are possible and the above
5981 // mentioned transformation is profitable.
5982 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
5983                                     unsigned ExtOpc,
5984                                     SmallVectorImpl<SDNode *> &ExtendNodes,
5985                                     const TargetLowering &TLI) {
5986   bool HasCopyToRegUses = false;
5987   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
5988   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
5989                             UE = N0.getNode()->use_end();
5990        UI != UE; ++UI) {
5991     SDNode *User = *UI;
5992     if (User == N)
5993       continue;
5994     if (UI.getUse().getResNo() != N0.getResNo())
5995       continue;
5996     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
5997     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
5998       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
5999       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6000         // Sign bits will be lost after a zext.
6001         return false;
6002       bool Add = false;
6003       for (unsigned i = 0; i != 2; ++i) {
6004         SDValue UseOp = User->getOperand(i);
6005         if (UseOp == N0)
6006           continue;
6007         if (!isa<ConstantSDNode>(UseOp))
6008           return false;
6009         Add = true;
6010       }
6011       if (Add)
6012         ExtendNodes.push_back(User);
6013       continue;
6014     }
6015     // If truncates aren't free and there are users we can't
6016     // extend, it isn't worthwhile.
6017     if (!isTruncFree)
6018       return false;
6019     // Remember if this value is live-out.
6020     if (User->getOpcode() == ISD::CopyToReg)
6021       HasCopyToRegUses = true;
6022   }
6023 
6024   if (HasCopyToRegUses) {
6025     bool BothLiveOut = false;
6026     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6027          UI != UE; ++UI) {
6028       SDUse &Use = UI.getUse();
6029       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6030         BothLiveOut = true;
6031         break;
6032       }
6033     }
6034     if (BothLiveOut)
6035       // Both unextended and extended values are live out. There had better be
6036       // a good reason for the transformation.
6037       return ExtendNodes.size();
6038   }
6039   return true;
6040 }
6041 
6042 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6043                                   SDValue Trunc, SDValue ExtLoad,
6044                                   const SDLoc &DL, ISD::NodeType ExtType) {
6045   // Extend SetCC uses if necessary.
6046   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6047     SDNode *SetCC = SetCCs[i];
6048     SmallVector<SDValue, 4> Ops;
6049 
6050     for (unsigned j = 0; j != 2; ++j) {
6051       SDValue SOp = SetCC->getOperand(j);
6052       if (SOp == Trunc)
6053         Ops.push_back(ExtLoad);
6054       else
6055         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6056     }
6057 
6058     Ops.push_back(SetCC->getOperand(2));
6059     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6060   }
6061 }
6062 
6063 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6064 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6065   SDValue N0 = N->getOperand(0);
6066   EVT DstVT = N->getValueType(0);
6067   EVT SrcVT = N0.getValueType();
6068 
6069   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6070           N->getOpcode() == ISD::ZERO_EXTEND) &&
6071          "Unexpected node type (not an extend)!");
6072 
6073   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6074   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6075   //   (v8i32 (sext (v8i16 (load x))))
6076   // into:
6077   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6078   //                          (v4i32 (sextload (x + 16)))))
6079   // Where uses of the original load, i.e.:
6080   //   (v8i16 (load x))
6081   // are replaced with:
6082   //   (v8i16 (truncate
6083   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6084   //                            (v4i32 (sextload (x + 16)))))))
6085   //
6086   // This combine is only applicable to illegal, but splittable, vectors.
6087   // All legal types, and illegal non-vector types, are handled elsewhere.
6088   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6089   //
6090   if (N0->getOpcode() != ISD::LOAD)
6091     return SDValue();
6092 
6093   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6094 
6095   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6096       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6097       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6098     return SDValue();
6099 
6100   SmallVector<SDNode *, 4> SetCCs;
6101   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6102     return SDValue();
6103 
6104   ISD::LoadExtType ExtType =
6105       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6106 
6107   // Try to split the vector types to get down to legal types.
6108   EVT SplitSrcVT = SrcVT;
6109   EVT SplitDstVT = DstVT;
6110   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6111          SplitSrcVT.getVectorNumElements() > 1) {
6112     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6113     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6114   }
6115 
6116   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6117     return SDValue();
6118 
6119   SDLoc DL(N);
6120   const unsigned NumSplits =
6121       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6122   const unsigned Stride = SplitSrcVT.getStoreSize();
6123   SmallVector<SDValue, 4> Loads;
6124   SmallVector<SDValue, 4> Chains;
6125 
6126   SDValue BasePtr = LN0->getBasePtr();
6127   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6128     const unsigned Offset = Idx * Stride;
6129     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6130 
6131     SDValue SplitLoad = DAG.getExtLoad(
6132         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6133         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6134         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6135 
6136     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6137                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6138 
6139     Loads.push_back(SplitLoad.getValue(0));
6140     Chains.push_back(SplitLoad.getValue(1));
6141   }
6142 
6143   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6144   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6145 
6146   CombineTo(N, NewValue);
6147 
6148   // Replace uses of the original load (before extension)
6149   // with a truncate of the concatenated sextloaded vectors.
6150   SDValue Trunc =
6151       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6152   CombineTo(N0.getNode(), Trunc, NewChain);
6153   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6154                   (ISD::NodeType)N->getOpcode());
6155   return SDValue(N, 0); // Return N so it doesn't get rechecked!
6156 }
6157 
6158 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6159   SDValue N0 = N->getOperand(0);
6160   EVT VT = N->getValueType(0);
6161 
6162   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6163                                               LegalOperations))
6164     return SDValue(Res, 0);
6165 
6166   // fold (sext (sext x)) -> (sext x)
6167   // fold (sext (aext x)) -> (sext x)
6168   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6169     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
6170                        N0.getOperand(0));
6171 
6172   if (N0.getOpcode() == ISD::TRUNCATE) {
6173     // fold (sext (truncate (load x))) -> (sext (smaller load x))
6174     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6175     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6176       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6177       if (NarrowLoad.getNode() != N0.getNode()) {
6178         CombineTo(N0.getNode(), NarrowLoad);
6179         // CombineTo deleted the truncate, if needed, but not what's under it.
6180         AddToWorklist(oye);
6181       }
6182       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6183     }
6184 
6185     // See if the value being truncated is already sign extended.  If so, just
6186     // eliminate the trunc/sext pair.
6187     SDValue Op = N0.getOperand(0);
6188     unsigned OpBits   = Op.getScalarValueSizeInBits();
6189     unsigned MidBits  = N0.getScalarValueSizeInBits();
6190     unsigned DestBits = VT.getScalarSizeInBits();
6191     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6192 
6193     if (OpBits == DestBits) {
6194       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
6195       // bits, it is already ready.
6196       if (NumSignBits > DestBits-MidBits)
6197         return Op;
6198     } else if (OpBits < DestBits) {
6199       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
6200       // bits, just sext from i32.
6201       if (NumSignBits > OpBits-MidBits)
6202         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
6203     } else {
6204       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6205       // bits, just truncate to i32.
6206       if (NumSignBits > OpBits-MidBits)
6207         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6208     }
6209 
6210     // fold (sext (truncate x)) -> (sextinreg x).
6211     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6212                                                  N0.getValueType())) {
6213       if (OpBits < DestBits)
6214         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6215       else if (OpBits > DestBits)
6216         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6217       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
6218                          DAG.getValueType(N0.getValueType()));
6219     }
6220   }
6221 
6222   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6223   // Only generate vector extloads when 1) they're legal, and 2) they are
6224   // deemed desirable by the target.
6225   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6226       ((!LegalOperations && !VT.isVector() &&
6227         !cast<LoadSDNode>(N0)->isVolatile()) ||
6228        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6229     bool DoXform = true;
6230     SmallVector<SDNode*, 4> SetCCs;
6231     if (!N0.hasOneUse())
6232       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6233     if (VT.isVector())
6234       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6235     if (DoXform) {
6236       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6237       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6238                                        LN0->getChain(),
6239                                        LN0->getBasePtr(), N0.getValueType(),
6240                                        LN0->getMemOperand());
6241       CombineTo(N, ExtLoad);
6242       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6243                                   N0.getValueType(), ExtLoad);
6244       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6245       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6246                       ISD::SIGN_EXTEND);
6247       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6248     }
6249   }
6250 
6251   // fold (sext (load x)) to multiple smaller sextloads.
6252   // Only on illegal but splittable vectors.
6253   if (SDValue ExtLoad = CombineExtLoad(N))
6254     return ExtLoad;
6255 
6256   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6257   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6258   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6259       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6260     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6261     EVT MemVT = LN0->getMemoryVT();
6262     if ((!LegalOperations && !LN0->isVolatile()) ||
6263         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6264       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6265                                        LN0->getChain(),
6266                                        LN0->getBasePtr(), MemVT,
6267                                        LN0->getMemOperand());
6268       CombineTo(N, ExtLoad);
6269       CombineTo(N0.getNode(),
6270                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6271                             N0.getValueType(), ExtLoad),
6272                 ExtLoad.getValue(1));
6273       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6274     }
6275   }
6276 
6277   // fold (sext (and/or/xor (load x), cst)) ->
6278   //      (and/or/xor (sextload x), (sext cst))
6279   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6280        N0.getOpcode() == ISD::XOR) &&
6281       isa<LoadSDNode>(N0.getOperand(0)) &&
6282       N0.getOperand(1).getOpcode() == ISD::Constant &&
6283       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6284       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6285     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6286     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6287       bool DoXform = true;
6288       SmallVector<SDNode*, 4> SetCCs;
6289       if (!N0.hasOneUse())
6290         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
6291                                           SetCCs, TLI);
6292       if (DoXform) {
6293         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
6294                                          LN0->getChain(), LN0->getBasePtr(),
6295                                          LN0->getMemoryVT(),
6296                                          LN0->getMemOperand());
6297         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6298         Mask = Mask.sext(VT.getSizeInBits());
6299         SDLoc DL(N);
6300         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6301                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6302         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6303                                     SDLoc(N0.getOperand(0)),
6304                                     N0.getOperand(0).getValueType(), ExtLoad);
6305         CombineTo(N, And);
6306         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6307         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6308                         ISD::SIGN_EXTEND);
6309         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6310       }
6311     }
6312   }
6313 
6314   if (N0.getOpcode() == ISD::SETCC) {
6315     EVT N0VT = N0.getOperand(0).getValueType();
6316     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
6317     // Only do this before legalize for now.
6318     if (VT.isVector() && !LegalOperations &&
6319         TLI.getBooleanContents(N0VT) ==
6320             TargetLowering::ZeroOrNegativeOneBooleanContent) {
6321       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
6322       // of the same size as the compared operands. Only optimize sext(setcc())
6323       // if this is the case.
6324       EVT SVT = getSetCCResultType(N0VT);
6325 
6326       // We know that the # elements of the results is the same as the
6327       // # elements of the compare (and the # elements of the compare result
6328       // for that matter).  Check to see that they are the same size.  If so,
6329       // we know that the element size of the sext'd result matches the
6330       // element size of the compare operands.
6331       if (VT.getSizeInBits() == SVT.getSizeInBits())
6332         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6333                              N0.getOperand(1),
6334                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6335 
6336       // If the desired elements are smaller or larger than the source
6337       // elements we can use a matching integer vector type and then
6338       // truncate/sign extend
6339       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6340       if (SVT == MatchingVectorType) {
6341         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6342                                N0.getOperand(0), N0.getOperand(1),
6343                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
6344         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6345       }
6346     }
6347 
6348     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
6349     // Here, T can be 1 or -1, depending on the type of the setcc and
6350     // getBooleanContents().
6351     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
6352 
6353     SDLoc DL(N);
6354     // To determine the "true" side of the select, we need to know the high bit
6355     // of the value returned by the setcc if it evaluates to true.
6356     // If the type of the setcc is i1, then the true case of the select is just
6357     // sext(i1 1), that is, -1.
6358     // If the type of the setcc is larger (say, i8) then the value of the high
6359     // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
6360     // of the appropriate width.
6361     SDValue ExtTrueVal =
6362         (SetCCWidth == 1)
6363             ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
6364                               DL, VT)
6365             : TLI.getConstTrueVal(DAG, VT, DL);
6366 
6367     if (SDValue SCC = SimplifySelectCC(
6368             DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
6369             DAG.getConstant(0, DL, VT),
6370             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6371       return SCC;
6372 
6373     if (!VT.isVector()) {
6374       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6375       if (!LegalOperations ||
6376           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
6377         SDLoc DL(N);
6378         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6379         SDValue SetCC =
6380             DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
6381         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
6382                              DAG.getConstant(0, DL, VT));
6383       }
6384     }
6385   }
6386 
6387   // fold (sext x) -> (zext x) if the sign bit is known zero.
6388   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6389       DAG.SignBitIsZero(N0))
6390     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6391 
6392   return SDValue();
6393 }
6394 
6395 // isTruncateOf - If N is a truncate of some other value, return true, record
6396 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6397 // This function computes KnownZero to avoid a duplicated call to
6398 // computeKnownBits in the caller.
6399 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6400                          APInt &KnownZero) {
6401   APInt KnownOne;
6402   if (N->getOpcode() == ISD::TRUNCATE) {
6403     Op = N->getOperand(0);
6404     DAG.computeKnownBits(Op, KnownZero, KnownOne);
6405     return true;
6406   }
6407 
6408   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6409       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6410     return false;
6411 
6412   SDValue Op0 = N->getOperand(0);
6413   SDValue Op1 = N->getOperand(1);
6414   assert(Op0.getValueType() == Op1.getValueType());
6415 
6416   if (isNullConstant(Op0))
6417     Op = Op1;
6418   else if (isNullConstant(Op1))
6419     Op = Op0;
6420   else
6421     return false;
6422 
6423   DAG.computeKnownBits(Op, KnownZero, KnownOne);
6424 
6425   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6426     return false;
6427 
6428   return true;
6429 }
6430 
6431 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6432   SDValue N0 = N->getOperand(0);
6433   EVT VT = N->getValueType(0);
6434 
6435   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6436                                               LegalOperations))
6437     return SDValue(Res, 0);
6438 
6439   // fold (zext (zext x)) -> (zext x)
6440   // fold (zext (aext x)) -> (zext x)
6441   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6442     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6443                        N0.getOperand(0));
6444 
6445   // fold (zext (truncate x)) -> (zext x) or
6446   //      (zext (truncate x)) -> (truncate x)
6447   // This is valid when the truncated bits of x are already zero.
6448   // FIXME: We should extend this to work for vectors too.
6449   SDValue Op;
6450   APInt KnownZero;
6451   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6452     APInt TruncatedBits =
6453       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6454       APInt(Op.getValueSizeInBits(), 0) :
6455       APInt::getBitsSet(Op.getValueSizeInBits(),
6456                         N0.getValueSizeInBits(),
6457                         std::min(Op.getValueSizeInBits(),
6458                                  VT.getSizeInBits()));
6459     if (TruncatedBits == (KnownZero & TruncatedBits)) {
6460       if (VT.bitsGT(Op.getValueType()))
6461         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6462       if (VT.bitsLT(Op.getValueType()))
6463         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6464 
6465       return Op;
6466     }
6467   }
6468 
6469   // fold (zext (truncate (load x))) -> (zext (smaller load x))
6470   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6471   if (N0.getOpcode() == ISD::TRUNCATE) {
6472     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6473       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6474       if (NarrowLoad.getNode() != N0.getNode()) {
6475         CombineTo(N0.getNode(), NarrowLoad);
6476         // CombineTo deleted the truncate, if needed, but not what's under it.
6477         AddToWorklist(oye);
6478       }
6479       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6480     }
6481   }
6482 
6483   // fold (zext (truncate x)) -> (and x, mask)
6484   if (N0.getOpcode() == ISD::TRUNCATE) {
6485     // fold (zext (truncate (load x))) -> (zext (smaller load x))
6486     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6487     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6488       SDNode *oye = N0.getNode()->getOperand(0).getNode();
6489       if (NarrowLoad.getNode() != N0.getNode()) {
6490         CombineTo(N0.getNode(), NarrowLoad);
6491         // CombineTo deleted the truncate, if needed, but not what's under it.
6492         AddToWorklist(oye);
6493       }
6494       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6495     }
6496 
6497     EVT SrcVT = N0.getOperand(0).getValueType();
6498     EVT MinVT = N0.getValueType();
6499 
6500     // Try to mask before the extension to avoid having to generate a larger mask,
6501     // possibly over several sub-vectors.
6502     if (SrcVT.bitsLT(VT)) {
6503       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
6504                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
6505         SDValue Op = N0.getOperand(0);
6506         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6507         AddToWorklist(Op.getNode());
6508         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
6509       }
6510     }
6511 
6512     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
6513       SDValue Op = N0.getOperand(0);
6514       if (SrcVT.bitsLT(VT)) {
6515         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6516         AddToWorklist(Op.getNode());
6517       } else if (SrcVT.bitsGT(VT)) {
6518         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6519         AddToWorklist(Op.getNode());
6520       }
6521       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6522     }
6523   }
6524 
6525   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6526   // if either of the casts is not free.
6527   if (N0.getOpcode() == ISD::AND &&
6528       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6529       N0.getOperand(1).getOpcode() == ISD::Constant &&
6530       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6531                            N0.getValueType()) ||
6532        !TLI.isZExtFree(N0.getValueType(), VT))) {
6533     SDValue X = N0.getOperand(0).getOperand(0);
6534     if (X.getValueType().bitsLT(VT)) {
6535       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6536     } else if (X.getValueType().bitsGT(VT)) {
6537       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6538     }
6539     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6540     Mask = Mask.zext(VT.getSizeInBits());
6541     SDLoc DL(N);
6542     return DAG.getNode(ISD::AND, DL, VT,
6543                        X, DAG.getConstant(Mask, DL, VT));
6544   }
6545 
6546   // fold (zext (load x)) -> (zext (truncate (zextload x)))
6547   // Only generate vector extloads when 1) they're legal, and 2) they are
6548   // deemed desirable by the target.
6549   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6550       ((!LegalOperations && !VT.isVector() &&
6551         !cast<LoadSDNode>(N0)->isVolatile()) ||
6552        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6553     bool DoXform = true;
6554     SmallVector<SDNode*, 4> SetCCs;
6555     if (!N0.hasOneUse())
6556       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6557     if (VT.isVector())
6558       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6559     if (DoXform) {
6560       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6561       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6562                                        LN0->getChain(),
6563                                        LN0->getBasePtr(), N0.getValueType(),
6564                                        LN0->getMemOperand());
6565       CombineTo(N, ExtLoad);
6566       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6567                                   N0.getValueType(), ExtLoad);
6568       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6569 
6570       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6571                       ISD::ZERO_EXTEND);
6572       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6573     }
6574   }
6575 
6576   // fold (zext (load x)) to multiple smaller zextloads.
6577   // Only on illegal but splittable vectors.
6578   if (SDValue ExtLoad = CombineExtLoad(N))
6579     return ExtLoad;
6580 
6581   // fold (zext (and/or/xor (load x), cst)) ->
6582   //      (and/or/xor (zextload x), (zext cst))
6583   // Unless (and (load x) cst) will match as a zextload already and has
6584   // additional users.
6585   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6586        N0.getOpcode() == ISD::XOR) &&
6587       isa<LoadSDNode>(N0.getOperand(0)) &&
6588       N0.getOperand(1).getOpcode() == ISD::Constant &&
6589       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6590       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6591     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6592     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6593       bool DoXform = true;
6594       SmallVector<SDNode*, 4> SetCCs;
6595       if (!N0.hasOneUse()) {
6596         if (N0.getOpcode() == ISD::AND) {
6597           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
6598           auto NarrowLoad = false;
6599           EVT LoadResultTy = AndC->getValueType(0);
6600           EVT ExtVT, LoadedVT;
6601           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
6602                                NarrowLoad))
6603             DoXform = false;
6604         }
6605         if (DoXform)
6606           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
6607                                             ISD::ZERO_EXTEND, SetCCs, TLI);
6608       }
6609       if (DoXform) {
6610         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6611                                          LN0->getChain(), LN0->getBasePtr(),
6612                                          LN0->getMemoryVT(),
6613                                          LN0->getMemOperand());
6614         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6615         Mask = Mask.zext(VT.getSizeInBits());
6616         SDLoc DL(N);
6617         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6618                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6619         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6620                                     SDLoc(N0.getOperand(0)),
6621                                     N0.getOperand(0).getValueType(), ExtLoad);
6622         CombineTo(N, And);
6623         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6624         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6625                         ISD::ZERO_EXTEND);
6626         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6627       }
6628     }
6629   }
6630 
6631   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6632   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6633   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6634       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6635     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6636     EVT MemVT = LN0->getMemoryVT();
6637     if ((!LegalOperations && !LN0->isVolatile()) ||
6638         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6639       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6640                                        LN0->getChain(),
6641                                        LN0->getBasePtr(), MemVT,
6642                                        LN0->getMemOperand());
6643       CombineTo(N, ExtLoad);
6644       CombineTo(N0.getNode(),
6645                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6646                             ExtLoad),
6647                 ExtLoad.getValue(1));
6648       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6649     }
6650   }
6651 
6652   if (N0.getOpcode() == ISD::SETCC) {
6653     // Only do this before legalize for now.
6654     if (!LegalOperations && VT.isVector() &&
6655         N0.getValueType().getVectorElementType() == MVT::i1) {
6656       EVT N00VT = N0.getOperand(0).getValueType();
6657       if (getSetCCResultType(N00VT) == N0.getValueType())
6658         return SDValue();
6659 
6660       // We know that the # elements of the results is the same as the #
6661       // elements of the compare (and the # elements of the compare result for
6662       // that matter). Check to see that they are the same size. If so, we know
6663       // that the element size of the sext'd result matches the element size of
6664       // the compare operands.
6665       SDLoc DL(N);
6666       SDValue VecOnes = DAG.getConstant(1, DL, VT);
6667       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
6668         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6669         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
6670                                      N0.getOperand(1), N0.getOperand(2));
6671         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
6672       }
6673 
6674       // If the desired elements are smaller or larger than the source
6675       // elements we can use a matching integer vector type and then
6676       // truncate/sign extend.
6677       EVT MatchingElementType = EVT::getIntegerVT(
6678           *DAG.getContext(), N00VT.getScalarSizeInBits());
6679       EVT MatchingVectorType = EVT::getVectorVT(
6680           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
6681       SDValue VsetCC =
6682           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
6683                       N0.getOperand(1), N0.getOperand(2));
6684       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
6685                          VecOnes);
6686     }
6687 
6688     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6689     SDLoc DL(N);
6690     if (SDValue SCC = SimplifySelectCC(
6691             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6692             DAG.getConstant(0, DL, VT),
6693             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6694       return SCC;
6695   }
6696 
6697   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6698   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6699       isa<ConstantSDNode>(N0.getOperand(1)) &&
6700       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6701       N0.hasOneUse()) {
6702     SDValue ShAmt = N0.getOperand(1);
6703     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6704     if (N0.getOpcode() == ISD::SHL) {
6705       SDValue InnerZExt = N0.getOperand(0);
6706       // If the original shl may be shifting out bits, do not perform this
6707       // transformation.
6708       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
6709         InnerZExt.getOperand(0).getValueSizeInBits();
6710       if (ShAmtVal > KnownZeroBits)
6711         return SDValue();
6712     }
6713 
6714     SDLoc DL(N);
6715 
6716     // Ensure that the shift amount is wide enough for the shifted value.
6717     if (VT.getSizeInBits() >= 256)
6718       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6719 
6720     return DAG.getNode(N0.getOpcode(), DL, VT,
6721                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6722                        ShAmt);
6723   }
6724 
6725   return SDValue();
6726 }
6727 
6728 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6729   SDValue N0 = N->getOperand(0);
6730   EVT VT = N->getValueType(0);
6731 
6732   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6733                                               LegalOperations))
6734     return SDValue(Res, 0);
6735 
6736   // fold (aext (aext x)) -> (aext x)
6737   // fold (aext (zext x)) -> (zext x)
6738   // fold (aext (sext x)) -> (sext x)
6739   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
6740       N0.getOpcode() == ISD::ZERO_EXTEND ||
6741       N0.getOpcode() == ISD::SIGN_EXTEND)
6742     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6743 
6744   // fold (aext (truncate (load x))) -> (aext (smaller load x))
6745   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6746   if (N0.getOpcode() == ISD::TRUNCATE) {
6747     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6748       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6749       if (NarrowLoad.getNode() != N0.getNode()) {
6750         CombineTo(N0.getNode(), NarrowLoad);
6751         // CombineTo deleted the truncate, if needed, but not what's under it.
6752         AddToWorklist(oye);
6753       }
6754       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6755     }
6756   }
6757 
6758   // fold (aext (truncate x))
6759   if (N0.getOpcode() == ISD::TRUNCATE) {
6760     SDValue TruncOp = N0.getOperand(0);
6761     if (TruncOp.getValueType() == VT)
6762       return TruncOp; // x iff x size == zext size.
6763     if (TruncOp.getValueType().bitsGT(VT))
6764       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6765     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6766   }
6767 
6768   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6769   // if the trunc is not free.
6770   if (N0.getOpcode() == ISD::AND &&
6771       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6772       N0.getOperand(1).getOpcode() == ISD::Constant &&
6773       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6774                           N0.getValueType())) {
6775     SDValue X = N0.getOperand(0).getOperand(0);
6776     if (X.getValueType().bitsLT(VT)) {
6777       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
6778     } else if (X.getValueType().bitsGT(VT)) {
6779       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
6780     }
6781     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6782     Mask = Mask.zext(VT.getSizeInBits());
6783     SDLoc DL(N);
6784     return DAG.getNode(ISD::AND, DL, VT,
6785                        X, DAG.getConstant(Mask, DL, VT));
6786   }
6787 
6788   // fold (aext (load x)) -> (aext (truncate (extload x)))
6789   // None of the supported targets knows how to perform load and any_ext
6790   // on vectors in one instruction.  We only perform this transformation on
6791   // scalars.
6792   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6793       ISD::isUNINDEXEDLoad(N0.getNode()) &&
6794       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6795     bool DoXform = true;
6796     SmallVector<SDNode*, 4> SetCCs;
6797     if (!N0.hasOneUse())
6798       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6799     if (DoXform) {
6800       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6801       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6802                                        LN0->getChain(),
6803                                        LN0->getBasePtr(), N0.getValueType(),
6804                                        LN0->getMemOperand());
6805       CombineTo(N, ExtLoad);
6806       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6807                                   N0.getValueType(), ExtLoad);
6808       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6809       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6810                       ISD::ANY_EXTEND);
6811       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6812     }
6813   }
6814 
6815   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6816   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6817   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
6818   if (N0.getOpcode() == ISD::LOAD &&
6819       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6820       N0.hasOneUse()) {
6821     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6822     ISD::LoadExtType ExtType = LN0->getExtensionType();
6823     EVT MemVT = LN0->getMemoryVT();
6824     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6825       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6826                                        VT, LN0->getChain(), LN0->getBasePtr(),
6827                                        MemVT, LN0->getMemOperand());
6828       CombineTo(N, ExtLoad);
6829       CombineTo(N0.getNode(),
6830                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6831                             N0.getValueType(), ExtLoad),
6832                 ExtLoad.getValue(1));
6833       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6834     }
6835   }
6836 
6837   if (N0.getOpcode() == ISD::SETCC) {
6838     // For vectors:
6839     // aext(setcc) -> vsetcc
6840     // aext(setcc) -> truncate(vsetcc)
6841     // aext(setcc) -> aext(vsetcc)
6842     // Only do this before legalize for now.
6843     if (VT.isVector() && !LegalOperations) {
6844       EVT N0VT = N0.getOperand(0).getValueType();
6845         // We know that the # elements of the results is the same as the
6846         // # elements of the compare (and the # elements of the compare result
6847         // for that matter).  Check to see that they are the same size.  If so,
6848         // we know that the element size of the sext'd result matches the
6849         // element size of the compare operands.
6850       if (VT.getSizeInBits() == N0VT.getSizeInBits())
6851         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6852                              N0.getOperand(1),
6853                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6854       // If the desired elements are smaller or larger than the source
6855       // elements we can use a matching integer vector type and then
6856       // truncate/any extend
6857       else {
6858         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6859         SDValue VsetCC =
6860           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6861                         N0.getOperand(1),
6862                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
6863         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6864       }
6865     }
6866 
6867     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6868     SDLoc DL(N);
6869     if (SDValue SCC = SimplifySelectCC(
6870             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6871             DAG.getConstant(0, DL, VT),
6872             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6873       return SCC;
6874   }
6875 
6876   return SDValue();
6877 }
6878 
6879 /// See if the specified operand can be simplified with the knowledge that only
6880 /// the bits specified by Mask are used.  If so, return the simpler operand,
6881 /// otherwise return a null SDValue.
6882 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6883   switch (V.getOpcode()) {
6884   default: break;
6885   case ISD::Constant: {
6886     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
6887     assert(CV && "Const value should be ConstSDNode.");
6888     const APInt &CVal = CV->getAPIntValue();
6889     APInt NewVal = CVal & Mask;
6890     if (NewVal != CVal)
6891       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
6892     break;
6893   }
6894   case ISD::OR:
6895   case ISD::XOR:
6896     // If the LHS or RHS don't contribute bits to the or, drop them.
6897     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
6898       return V.getOperand(1);
6899     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
6900       return V.getOperand(0);
6901     break;
6902   case ISD::SRL:
6903     // Only look at single-use SRLs.
6904     if (!V.getNode()->hasOneUse())
6905       break;
6906     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
6907       // See if we can recursively simplify the LHS.
6908       unsigned Amt = RHSC->getZExtValue();
6909 
6910       // Watch out for shift count overflow though.
6911       if (Amt >= Mask.getBitWidth()) break;
6912       APInt NewMask = Mask << Amt;
6913       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
6914         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
6915                            SimplifyLHS, V.getOperand(1));
6916     }
6917   }
6918   return SDValue();
6919 }
6920 
6921 /// If the result of a wider load is shifted to right of N  bits and then
6922 /// truncated to a narrower type and where N is a multiple of number of bits of
6923 /// the narrower type, transform it to a narrower load from address + N / num of
6924 /// bits of new type. If the result is to be extended, also fold the extension
6925 /// to form a extending load.
6926 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
6927   unsigned Opc = N->getOpcode();
6928 
6929   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
6930   SDValue N0 = N->getOperand(0);
6931   EVT VT = N->getValueType(0);
6932   EVT ExtVT = VT;
6933 
6934   // This transformation isn't valid for vector loads.
6935   if (VT.isVector())
6936     return SDValue();
6937 
6938   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
6939   // extended to VT.
6940   if (Opc == ISD::SIGN_EXTEND_INREG) {
6941     ExtType = ISD::SEXTLOAD;
6942     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6943   } else if (Opc == ISD::SRL) {
6944     // Another special-case: SRL is basically zero-extending a narrower value.
6945     ExtType = ISD::ZEXTLOAD;
6946     N0 = SDValue(N, 0);
6947     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6948     if (!N01) return SDValue();
6949     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
6950                               VT.getSizeInBits() - N01->getZExtValue());
6951   }
6952   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
6953     return SDValue();
6954 
6955   unsigned EVTBits = ExtVT.getSizeInBits();
6956 
6957   // Do not generate loads of non-round integer types since these can
6958   // be expensive (and would be wrong if the type is not byte sized).
6959   if (!ExtVT.isRound())
6960     return SDValue();
6961 
6962   unsigned ShAmt = 0;
6963   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6964     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6965       ShAmt = N01->getZExtValue();
6966       // Is the shift amount a multiple of size of VT?
6967       if ((ShAmt & (EVTBits-1)) == 0) {
6968         N0 = N0.getOperand(0);
6969         // Is the load width a multiple of size of VT?
6970         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
6971           return SDValue();
6972       }
6973 
6974       // At this point, we must have a load or else we can't do the transform.
6975       if (!isa<LoadSDNode>(N0)) return SDValue();
6976 
6977       // Because a SRL must be assumed to *need* to zero-extend the high bits
6978       // (as opposed to anyext the high bits), we can't combine the zextload
6979       // lowering of SRL and an sextload.
6980       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
6981         return SDValue();
6982 
6983       // If the shift amount is larger than the input type then we're not
6984       // accessing any of the loaded bytes.  If the load was a zextload/extload
6985       // then the result of the shift+trunc is zero/undef (handled elsewhere).
6986       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
6987         return SDValue();
6988     }
6989   }
6990 
6991   // If the load is shifted left (and the result isn't shifted back right),
6992   // we can fold the truncate through the shift.
6993   unsigned ShLeftAmt = 0;
6994   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
6995       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
6996     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6997       ShLeftAmt = N01->getZExtValue();
6998       N0 = N0.getOperand(0);
6999     }
7000   }
7001 
7002   // If we haven't found a load, we can't narrow it.  Don't transform one with
7003   // multiple uses, this would require adding a new load.
7004   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7005     return SDValue();
7006 
7007   // Don't change the width of a volatile load.
7008   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7009   if (LN0->isVolatile())
7010     return SDValue();
7011 
7012   // Verify that we are actually reducing a load width here.
7013   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7014     return SDValue();
7015 
7016   // For the transform to be legal, the load must produce only two values
7017   // (the value loaded and the chain).  Don't transform a pre-increment
7018   // load, for example, which produces an extra value.  Otherwise the
7019   // transformation is not equivalent, and the downstream logic to replace
7020   // uses gets things wrong.
7021   if (LN0->getNumValues() > 2)
7022     return SDValue();
7023 
7024   // If the load that we're shrinking is an extload and we're not just
7025   // discarding the extension we can't simply shrink the load. Bail.
7026   // TODO: It would be possible to merge the extensions in some cases.
7027   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7028       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7029     return SDValue();
7030 
7031   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7032     return SDValue();
7033 
7034   EVT PtrType = N0.getOperand(1).getValueType();
7035 
7036   if (PtrType == MVT::Untyped || PtrType.isExtended())
7037     // It's not possible to generate a constant of extended or untyped type.
7038     return SDValue();
7039 
7040   // For big endian targets, we need to adjust the offset to the pointer to
7041   // load the correct bytes.
7042   if (DAG.getDataLayout().isBigEndian()) {
7043     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7044     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7045     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7046   }
7047 
7048   uint64_t PtrOff = ShAmt / 8;
7049   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7050   SDLoc DL(LN0);
7051   // The original load itself didn't wrap, so an offset within it doesn't.
7052   SDNodeFlags Flags;
7053   Flags.setNoUnsignedWrap(true);
7054   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7055                                PtrType, LN0->getBasePtr(),
7056                                DAG.getConstant(PtrOff, DL, PtrType),
7057                                &Flags);
7058   AddToWorklist(NewPtr.getNode());
7059 
7060   SDValue Load;
7061   if (ExtType == ISD::NON_EXTLOAD)
7062     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7063                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7064                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7065   else
7066     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7067                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7068                           NewAlign, LN0->getMemOperand()->getFlags(),
7069                           LN0->getAAInfo());
7070 
7071   // Replace the old load's chain with the new load's chain.
7072   WorklistRemover DeadNodes(*this);
7073   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7074 
7075   // Shift the result left, if we've swallowed a left shift.
7076   SDValue Result = Load;
7077   if (ShLeftAmt != 0) {
7078     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7079     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7080       ShImmTy = VT;
7081     // If the shift amount is as large as the result size (but, presumably,
7082     // no larger than the source) then the useful bits of the result are
7083     // zero; we can't simply return the shortened shift, because the result
7084     // of that operation is undefined.
7085     SDLoc DL(N0);
7086     if (ShLeftAmt >= VT.getSizeInBits())
7087       Result = DAG.getConstant(0, DL, VT);
7088     else
7089       Result = DAG.getNode(ISD::SHL, DL, VT,
7090                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7091   }
7092 
7093   // Return the new loaded value.
7094   return Result;
7095 }
7096 
7097 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7098   SDValue N0 = N->getOperand(0);
7099   SDValue N1 = N->getOperand(1);
7100   EVT VT = N->getValueType(0);
7101   EVT EVT = cast<VTSDNode>(N1)->getVT();
7102   unsigned VTBits = VT.getScalarSizeInBits();
7103   unsigned EVTBits = EVT.getScalarSizeInBits();
7104 
7105   if (N0.isUndef())
7106     return DAG.getUNDEF(VT);
7107 
7108   // fold (sext_in_reg c1) -> c1
7109   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7110     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7111 
7112   // If the input is already sign extended, just drop the extension.
7113   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7114     return N0;
7115 
7116   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7117   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7118       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7119     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7120                        N0.getOperand(0), N1);
7121 
7122   // fold (sext_in_reg (sext x)) -> (sext x)
7123   // fold (sext_in_reg (aext x)) -> (sext x)
7124   // if x is small enough.
7125   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7126     SDValue N00 = N0.getOperand(0);
7127     if (N00.getScalarValueSizeInBits() <= EVTBits &&
7128         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7129       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7130   }
7131 
7132   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7133   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
7134     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7135 
7136   // fold operands of sext_in_reg based on knowledge that the top bits are not
7137   // demanded.
7138   if (SimplifyDemandedBits(SDValue(N, 0)))
7139     return SDValue(N, 0);
7140 
7141   // fold (sext_in_reg (load x)) -> (smaller sextload x)
7142   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7143   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7144     return NarrowLoad;
7145 
7146   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7147   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7148   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7149   if (N0.getOpcode() == ISD::SRL) {
7150     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7151       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7152         // We can turn this into an SRA iff the input to the SRL is already sign
7153         // extended enough.
7154         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7155         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7156           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7157                              N0.getOperand(0), N0.getOperand(1));
7158       }
7159   }
7160 
7161   // fold (sext_inreg (extload x)) -> (sextload x)
7162   if (ISD::isEXTLoad(N0.getNode()) &&
7163       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7164       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7165       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7166        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7167     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7168     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7169                                      LN0->getChain(),
7170                                      LN0->getBasePtr(), EVT,
7171                                      LN0->getMemOperand());
7172     CombineTo(N, ExtLoad);
7173     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7174     AddToWorklist(ExtLoad.getNode());
7175     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7176   }
7177   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7178   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7179       N0.hasOneUse() &&
7180       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7181       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7182        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7183     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7184     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7185                                      LN0->getChain(),
7186                                      LN0->getBasePtr(), EVT,
7187                                      LN0->getMemOperand());
7188     CombineTo(N, ExtLoad);
7189     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7190     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7191   }
7192 
7193   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7194   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7195     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7196                                            N0.getOperand(1), false))
7197       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7198                          BSwap, N1);
7199   }
7200 
7201   return SDValue();
7202 }
7203 
7204 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7205   SDValue N0 = N->getOperand(0);
7206   EVT VT = N->getValueType(0);
7207 
7208   if (N0.isUndef())
7209     return DAG.getUNDEF(VT);
7210 
7211   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7212                                               LegalOperations))
7213     return SDValue(Res, 0);
7214 
7215   return SDValue();
7216 }
7217 
7218 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7219   SDValue N0 = N->getOperand(0);
7220   EVT VT = N->getValueType(0);
7221 
7222   if (N0.isUndef())
7223     return DAG.getUNDEF(VT);
7224 
7225   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7226                                               LegalOperations))
7227     return SDValue(Res, 0);
7228 
7229   return SDValue();
7230 }
7231 
7232 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7233   SDValue N0 = N->getOperand(0);
7234   EVT VT = N->getValueType(0);
7235   bool isLE = DAG.getDataLayout().isLittleEndian();
7236 
7237   // noop truncate
7238   if (N0.getValueType() == N->getValueType(0))
7239     return N0;
7240   // fold (truncate c1) -> c1
7241   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7242     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7243   // fold (truncate (truncate x)) -> (truncate x)
7244   if (N0.getOpcode() == ISD::TRUNCATE)
7245     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7246   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7247   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7248       N0.getOpcode() == ISD::SIGN_EXTEND ||
7249       N0.getOpcode() == ISD::ANY_EXTEND) {
7250     // if the source is smaller than the dest, we still need an extend.
7251     if (N0.getOperand(0).getValueType().bitsLT(VT))
7252       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7253     // if the source is larger than the dest, than we just need the truncate.
7254     if (N0.getOperand(0).getValueType().bitsGT(VT))
7255       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7256     // if the source and dest are the same type, we can drop both the extend
7257     // and the truncate.
7258     return N0.getOperand(0);
7259   }
7260 
7261   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
7262   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
7263     return SDValue();
7264 
7265   // Fold extract-and-trunc into a narrow extract. For example:
7266   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
7267   //   i32 y = TRUNCATE(i64 x)
7268   //        -- becomes --
7269   //   v16i8 b = BITCAST (v2i64 val)
7270   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
7271   //
7272   // Note: We only run this optimization after type legalization (which often
7273   // creates this pattern) and before operation legalization after which
7274   // we need to be more careful about the vector instructions that we generate.
7275   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7276       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
7277 
7278     EVT VecTy = N0.getOperand(0).getValueType();
7279     EVT ExTy = N0.getValueType();
7280     EVT TrTy = N->getValueType(0);
7281 
7282     unsigned NumElem = VecTy.getVectorNumElements();
7283     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
7284 
7285     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
7286     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
7287 
7288     SDValue EltNo = N0->getOperand(1);
7289     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
7290       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
7291       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7292       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
7293 
7294       SDLoc DL(N);
7295       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
7296                          DAG.getBitcast(NVT, N0.getOperand(0)),
7297                          DAG.getConstant(Index, DL, IndexTy));
7298     }
7299   }
7300 
7301   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
7302   if (N0.getOpcode() == ISD::SELECT) {
7303     EVT SrcVT = N0.getValueType();
7304     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
7305         TLI.isTruncateFree(SrcVT, VT)) {
7306       SDLoc SL(N0);
7307       SDValue Cond = N0.getOperand(0);
7308       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
7309       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
7310       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
7311     }
7312   }
7313 
7314   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
7315   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7316       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
7317       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
7318     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
7319       uint64_t Amt = CAmt->getZExtValue();
7320       unsigned Size = VT.getScalarSizeInBits();
7321 
7322       if (Amt < Size) {
7323         SDLoc SL(N);
7324         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
7325 
7326         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
7327         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
7328                            DAG.getConstant(Amt, SL, AmtVT));
7329       }
7330     }
7331   }
7332 
7333   // Fold a series of buildvector, bitcast, and truncate if possible.
7334   // For example fold
7335   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
7336   //   (2xi32 (buildvector x, y)).
7337   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
7338       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
7339       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
7340       N0.getOperand(0).hasOneUse()) {
7341 
7342     SDValue BuildVect = N0.getOperand(0);
7343     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
7344     EVT TruncVecEltTy = VT.getVectorElementType();
7345 
7346     // Check that the element types match.
7347     if (BuildVectEltTy == TruncVecEltTy) {
7348       // Now we only need to compute the offset of the truncated elements.
7349       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
7350       unsigned TruncVecNumElts = VT.getVectorNumElements();
7351       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
7352 
7353       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
7354              "Invalid number of elements");
7355 
7356       SmallVector<SDValue, 8> Opnds;
7357       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
7358         Opnds.push_back(BuildVect.getOperand(i));
7359 
7360       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
7361     }
7362   }
7363 
7364   // See if we can simplify the input to this truncate through knowledge that
7365   // only the low bits are being used.
7366   // For example "trunc (or (shl x, 8), y)" // -> trunc y
7367   // Currently we only perform this optimization on scalars because vectors
7368   // may have different active low bits.
7369   if (!VT.isVector()) {
7370     if (SDValue Shorter =
7371             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
7372                                                      VT.getSizeInBits())))
7373       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
7374   }
7375   // fold (truncate (load x)) -> (smaller load x)
7376   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7377   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7378     if (SDValue Reduced = ReduceLoadWidth(N))
7379       return Reduced;
7380 
7381     // Handle the case where the load remains an extending load even
7382     // after truncation.
7383     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7384       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7385       if (!LN0->isVolatile() &&
7386           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7387         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7388                                          VT, LN0->getChain(), LN0->getBasePtr(),
7389                                          LN0->getMemoryVT(),
7390                                          LN0->getMemOperand());
7391         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7392         return NewLoad;
7393       }
7394     }
7395   }
7396   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7397   // where ... are all 'undef'.
7398   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7399     SmallVector<EVT, 8> VTs;
7400     SDValue V;
7401     unsigned Idx = 0;
7402     unsigned NumDefs = 0;
7403 
7404     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7405       SDValue X = N0.getOperand(i);
7406       if (!X.isUndef()) {
7407         V = X;
7408         Idx = i;
7409         NumDefs++;
7410       }
7411       // Stop if more than one members are non-undef.
7412       if (NumDefs > 1)
7413         break;
7414       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
7415                                      VT.getVectorElementType(),
7416                                      X.getValueType().getVectorNumElements()));
7417     }
7418 
7419     if (NumDefs == 0)
7420       return DAG.getUNDEF(VT);
7421 
7422     if (NumDefs == 1) {
7423       assert(V.getNode() && "The single defined operand is empty!");
7424       SmallVector<SDValue, 8> Opnds;
7425       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7426         if (i != Idx) {
7427           Opnds.push_back(DAG.getUNDEF(VTs[i]));
7428           continue;
7429         }
7430         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7431         AddToWorklist(NV.getNode());
7432         Opnds.push_back(NV);
7433       }
7434       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7435     }
7436   }
7437 
7438   // Fold truncate of a bitcast of a vector to an extract of the low vector
7439   // element.
7440   //
7441   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7442   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7443     SDValue VecSrc = N0.getOperand(0);
7444     EVT SrcVT = VecSrc.getValueType();
7445     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
7446         (!LegalOperations ||
7447          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
7448       SDLoc SL(N);
7449 
7450       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7451       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7452                          VecSrc, DAG.getConstant(0, SL, IdxVT));
7453     }
7454   }
7455 
7456   // Simplify the operands using demanded-bits information.
7457   if (!VT.isVector() &&
7458       SimplifyDemandedBits(SDValue(N, 0)))
7459     return SDValue(N, 0);
7460 
7461   return SDValue();
7462 }
7463 
7464 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7465   SDValue Elt = N->getOperand(i);
7466   if (Elt.getOpcode() != ISD::MERGE_VALUES)
7467     return Elt.getNode();
7468   return Elt.getOperand(Elt.getResNo()).getNode();
7469 }
7470 
7471 /// build_pair (load, load) -> load
7472 /// if load locations are consecutive.
7473 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7474   assert(N->getOpcode() == ISD::BUILD_PAIR);
7475 
7476   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
7477   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
7478   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7479       LD1->getAddressSpace() != LD2->getAddressSpace())
7480     return SDValue();
7481   EVT LD1VT = LD1->getValueType(0);
7482   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
7483   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
7484       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
7485     unsigned Align = LD1->getAlignment();
7486     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7487         VT.getTypeForEVT(*DAG.getContext()));
7488 
7489     if (NewAlign <= Align &&
7490         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7491       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
7492                          LD1->getPointerInfo(), Align);
7493   }
7494 
7495   return SDValue();
7496 }
7497 
7498 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
7499   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
7500   // and Lo parts; on big-endian machines it doesn't.
7501   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
7502 }
7503 
7504 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
7505                                     const TargetLowering &TLI) {
7506   // If this is not a bitcast to an FP type or if the target doesn't have
7507   // IEEE754-compliant FP logic, we're done.
7508   EVT VT = N->getValueType(0);
7509   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
7510     return SDValue();
7511 
7512   // TODO: Use splat values for the constant-checking below and remove this
7513   // restriction.
7514   SDValue N0 = N->getOperand(0);
7515   EVT SourceVT = N0.getValueType();
7516   if (SourceVT.isVector())
7517     return SDValue();
7518 
7519   unsigned FPOpcode;
7520   APInt SignMask;
7521   switch (N0.getOpcode()) {
7522   case ISD::AND:
7523     FPOpcode = ISD::FABS;
7524     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
7525     break;
7526   case ISD::XOR:
7527     FPOpcode = ISD::FNEG;
7528     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
7529     break;
7530   // TODO: ISD::OR --> ISD::FNABS?
7531   default:
7532     return SDValue();
7533   }
7534 
7535   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
7536   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
7537   SDValue LogicOp0 = N0.getOperand(0);
7538   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7539   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
7540       LogicOp0.getOpcode() == ISD::BITCAST &&
7541       LogicOp0->getOperand(0).getValueType() == VT)
7542     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
7543 
7544   return SDValue();
7545 }
7546 
7547 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7548   SDValue N0 = N->getOperand(0);
7549   EVT VT = N->getValueType(0);
7550 
7551   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7552   // Only do this before legalize, since afterward the target may be depending
7553   // on the bitconvert.
7554   // First check to see if this is all constant.
7555   if (!LegalTypes &&
7556       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7557       VT.isVector()) {
7558     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7559 
7560     EVT DestEltVT = N->getValueType(0).getVectorElementType();
7561     assert(!DestEltVT.isVector() &&
7562            "Element type of vector ValueType must not be vector!");
7563     if (isSimple)
7564       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7565   }
7566 
7567   // If the input is a constant, let getNode fold it.
7568   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7569     // If we can't allow illegal operations, we need to check that this is just
7570     // a fp -> int or int -> conversion and that the resulting operation will
7571     // be legal.
7572     if (!LegalOperations ||
7573         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7574          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7575         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7576          TLI.isOperationLegal(ISD::Constant, VT)))
7577       return DAG.getBitcast(VT, N0);
7578   }
7579 
7580   // (conv (conv x, t1), t2) -> (conv x, t2)
7581   if (N0.getOpcode() == ISD::BITCAST)
7582     return DAG.getBitcast(VT, N0.getOperand(0));
7583 
7584   // fold (conv (load x)) -> (load (conv*)x)
7585   // If the resultant load doesn't need a higher alignment than the original!
7586   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7587       // Do not change the width of a volatile load.
7588       !cast<LoadSDNode>(N0)->isVolatile() &&
7589       // Do not remove the cast if the types differ in endian layout.
7590       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7591           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7592       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7593       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7594     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7595     unsigned OrigAlign = LN0->getAlignment();
7596 
7597     bool Fast = false;
7598     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
7599                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
7600         Fast) {
7601       SDValue Load =
7602           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7603                       LN0->getPointerInfo(), OrigAlign,
7604                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7605       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7606       return Load;
7607     }
7608   }
7609 
7610   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
7611     return V;
7612 
7613   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7614   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7615   //
7616   // For ppc_fp128:
7617   // fold (bitcast (fneg x)) ->
7618   //     flipbit = signbit
7619   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7620   //
7621   // fold (bitcast (fabs x)) ->
7622   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
7623   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7624   // This often reduces constant pool loads.
7625   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7626        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7627       N0.getNode()->hasOneUse() && VT.isInteger() &&
7628       !VT.isVector() && !N0.getValueType().isVector()) {
7629     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
7630     AddToWorklist(NewConv.getNode());
7631 
7632     SDLoc DL(N);
7633     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7634       assert(VT.getSizeInBits() == 128);
7635       SDValue SignBit = DAG.getConstant(
7636           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
7637       SDValue FlipBit;
7638       if (N0.getOpcode() == ISD::FNEG) {
7639         FlipBit = SignBit;
7640         AddToWorklist(FlipBit.getNode());
7641       } else {
7642         assert(N0.getOpcode() == ISD::FABS);
7643         SDValue Hi =
7644             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
7645                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7646                                               SDLoc(NewConv)));
7647         AddToWorklist(Hi.getNode());
7648         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
7649         AddToWorklist(FlipBit.getNode());
7650       }
7651       SDValue FlipBits =
7652           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7653       AddToWorklist(FlipBits.getNode());
7654       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
7655     }
7656     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7657     if (N0.getOpcode() == ISD::FNEG)
7658       return DAG.getNode(ISD::XOR, DL, VT,
7659                          NewConv, DAG.getConstant(SignBit, DL, VT));
7660     assert(N0.getOpcode() == ISD::FABS);
7661     return DAG.getNode(ISD::AND, DL, VT,
7662                        NewConv, DAG.getConstant(~SignBit, DL, VT));
7663   }
7664 
7665   // fold (bitconvert (fcopysign cst, x)) ->
7666   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
7667   // Note that we don't handle (copysign x, cst) because this can always be
7668   // folded to an fneg or fabs.
7669   //
7670   // For ppc_fp128:
7671   // fold (bitcast (fcopysign cst, x)) ->
7672   //     flipbit = (and (extract_element
7673   //                     (xor (bitcast cst), (bitcast x)), 0),
7674   //                    signbit)
7675   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
7676   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7677       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7678       VT.isInteger() && !VT.isVector()) {
7679     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
7680     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7681     if (isTypeLegal(IntXVT)) {
7682       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
7683       AddToWorklist(X.getNode());
7684 
7685       // If X has a different width than the result/lhs, sext it or truncate it.
7686       unsigned VTWidth = VT.getSizeInBits();
7687       if (OrigXWidth < VTWidth) {
7688         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7689         AddToWorklist(X.getNode());
7690       } else if (OrigXWidth > VTWidth) {
7691         // To get the sign bit in the right place, we have to shift it right
7692         // before truncating.
7693         SDLoc DL(X);
7694         X = DAG.getNode(ISD::SRL, DL,
7695                         X.getValueType(), X,
7696                         DAG.getConstant(OrigXWidth-VTWidth, DL,
7697                                         X.getValueType()));
7698         AddToWorklist(X.getNode());
7699         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7700         AddToWorklist(X.getNode());
7701       }
7702 
7703       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7704         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
7705         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7706         AddToWorklist(Cst.getNode());
7707         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
7708         AddToWorklist(X.getNode());
7709         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
7710         AddToWorklist(XorResult.getNode());
7711         SDValue XorResult64 = DAG.getNode(
7712             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
7713             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7714                                   SDLoc(XorResult)));
7715         AddToWorklist(XorResult64.getNode());
7716         SDValue FlipBit =
7717             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
7718                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
7719         AddToWorklist(FlipBit.getNode());
7720         SDValue FlipBits =
7721             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7722         AddToWorklist(FlipBits.getNode());
7723         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
7724       }
7725       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7726       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7727                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
7728       AddToWorklist(X.getNode());
7729 
7730       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7731       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7732                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7733       AddToWorklist(Cst.getNode());
7734 
7735       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7736     }
7737   }
7738 
7739   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7740   if (N0.getOpcode() == ISD::BUILD_PAIR)
7741     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
7742       return CombineLD;
7743 
7744   // Remove double bitcasts from shuffles - this is often a legacy of
7745   // XformToShuffleWithZero being used to combine bitmaskings (of
7746   // float vectors bitcast to integer vectors) into shuffles.
7747   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7748   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7749       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7750       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
7751       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
7752     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7753 
7754     // If operands are a bitcast, peek through if it casts the original VT.
7755     // If operands are a constant, just bitcast back to original VT.
7756     auto PeekThroughBitcast = [&](SDValue Op) {
7757       if (Op.getOpcode() == ISD::BITCAST &&
7758           Op.getOperand(0).getValueType() == VT)
7759         return SDValue(Op.getOperand(0));
7760       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
7761           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
7762         return DAG.getBitcast(VT, Op);
7763       return SDValue();
7764     };
7765 
7766     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7767     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7768     if (!(SV0 && SV1))
7769       return SDValue();
7770 
7771     int MaskScale =
7772         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
7773     SmallVector<int, 8> NewMask;
7774     for (int M : SVN->getMask())
7775       for (int i = 0; i != MaskScale; ++i)
7776         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7777 
7778     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7779     if (!LegalMask) {
7780       std::swap(SV0, SV1);
7781       ShuffleVectorSDNode::commuteMask(NewMask);
7782       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7783     }
7784 
7785     if (LegalMask)
7786       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7787   }
7788 
7789   return SDValue();
7790 }
7791 
7792 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7793   EVT VT = N->getValueType(0);
7794   return CombineConsecutiveLoads(N, VT);
7795 }
7796 
7797 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7798 /// operands. DstEltVT indicates the destination element value type.
7799 SDValue DAGCombiner::
7800 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7801   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7802 
7803   // If this is already the right type, we're done.
7804   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7805 
7806   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7807   unsigned DstBitSize = DstEltVT.getSizeInBits();
7808 
7809   // If this is a conversion of N elements of one type to N elements of another
7810   // type, convert each element.  This handles FP<->INT cases.
7811   if (SrcBitSize == DstBitSize) {
7812     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7813                               BV->getValueType(0).getVectorNumElements());
7814 
7815     // Due to the FP element handling below calling this routine recursively,
7816     // we can end up with a scalar-to-vector node here.
7817     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7818       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7819                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
7820 
7821     SmallVector<SDValue, 8> Ops;
7822     for (SDValue Op : BV->op_values()) {
7823       // If the vector element type is not legal, the BUILD_VECTOR operands
7824       // are promoted and implicitly truncated.  Make that explicit here.
7825       if (Op.getValueType() != SrcEltVT)
7826         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7827       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
7828       AddToWorklist(Ops.back().getNode());
7829     }
7830     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
7831   }
7832 
7833   // Otherwise, we're growing or shrinking the elements.  To avoid having to
7834   // handle annoying details of growing/shrinking FP values, we convert them to
7835   // int first.
7836   if (SrcEltVT.isFloatingPoint()) {
7837     // Convert the input float vector to a int vector where the elements are the
7838     // same sizes.
7839     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7840     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7841     SrcEltVT = IntVT;
7842   }
7843 
7844   // Now we know the input is an integer vector.  If the output is a FP type,
7845   // convert to integer first, then to FP of the right size.
7846   if (DstEltVT.isFloatingPoint()) {
7847     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7848     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7849 
7850     // Next, convert to FP elements of the same size.
7851     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7852   }
7853 
7854   SDLoc DL(BV);
7855 
7856   // Okay, we know the src/dst types are both integers of differing types.
7857   // Handling growing first.
7858   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7859   if (SrcBitSize < DstBitSize) {
7860     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7861 
7862     SmallVector<SDValue, 8> Ops;
7863     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7864          i += NumInputsPerOutput) {
7865       bool isLE = DAG.getDataLayout().isLittleEndian();
7866       APInt NewBits = APInt(DstBitSize, 0);
7867       bool EltIsUndef = true;
7868       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7869         // Shift the previously computed bits over.
7870         NewBits <<= SrcBitSize;
7871         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7872         if (Op.isUndef()) continue;
7873         EltIsUndef = false;
7874 
7875         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
7876                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
7877       }
7878 
7879       if (EltIsUndef)
7880         Ops.push_back(DAG.getUNDEF(DstEltVT));
7881       else
7882         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
7883     }
7884 
7885     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
7886     return DAG.getBuildVector(VT, DL, Ops);
7887   }
7888 
7889   // Finally, this must be the case where we are shrinking elements: each input
7890   // turns into multiple outputs.
7891   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
7892   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7893                             NumOutputsPerInput*BV->getNumOperands());
7894   SmallVector<SDValue, 8> Ops;
7895 
7896   for (const SDValue &Op : BV->op_values()) {
7897     if (Op.isUndef()) {
7898       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
7899       continue;
7900     }
7901 
7902     APInt OpVal = cast<ConstantSDNode>(Op)->
7903                   getAPIntValue().zextOrTrunc(SrcBitSize);
7904 
7905     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
7906       APInt ThisVal = OpVal.trunc(DstBitSize);
7907       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
7908       OpVal = OpVal.lshr(DstBitSize);
7909     }
7910 
7911     // For big endian targets, swap the order of the pieces of each element.
7912     if (DAG.getDataLayout().isBigEndian())
7913       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
7914   }
7915 
7916   return DAG.getBuildVector(VT, DL, Ops);
7917 }
7918 
7919 /// Try to perform FMA combining on a given FADD node.
7920 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
7921   SDValue N0 = N->getOperand(0);
7922   SDValue N1 = N->getOperand(1);
7923   EVT VT = N->getValueType(0);
7924   SDLoc SL(N);
7925 
7926   const TargetOptions &Options = DAG.getTarget().Options;
7927   bool AllowFusion =
7928       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7929 
7930   // Floating-point multiply-add with intermediate rounding.
7931   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
7932 
7933   // Floating-point multiply-add without intermediate rounding.
7934   bool HasFMA =
7935       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7936       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
7937 
7938   // No valid opcode, do not combine.
7939   if (!HasFMAD && !HasFMA)
7940     return SDValue();
7941 
7942   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
7943   ;
7944   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
7945     return SDValue();
7946 
7947   // Always prefer FMAD to FMA for precision.
7948   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7949   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7950   bool LookThroughFPExt = TLI.isFPExtFree(VT);
7951 
7952   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
7953   // prefer to fold the multiply with fewer uses.
7954   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
7955       N1.getOpcode() == ISD::FMUL) {
7956     if (N0.getNode()->use_size() > N1.getNode()->use_size())
7957       std::swap(N0, N1);
7958   }
7959 
7960   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
7961   if (N0.getOpcode() == ISD::FMUL &&
7962       (Aggressive || N0->hasOneUse())) {
7963     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7964                        N0.getOperand(0), N0.getOperand(1), N1);
7965   }
7966 
7967   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
7968   // Note: Commutes FADD operands.
7969   if (N1.getOpcode() == ISD::FMUL &&
7970       (Aggressive || N1->hasOneUse())) {
7971     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7972                        N1.getOperand(0), N1.getOperand(1), N0);
7973   }
7974 
7975   // Look through FP_EXTEND nodes to do more combining.
7976   if (AllowFusion && LookThroughFPExt) {
7977     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7978     if (N0.getOpcode() == ISD::FP_EXTEND) {
7979       SDValue N00 = N0.getOperand(0);
7980       if (N00.getOpcode() == ISD::FMUL)
7981         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7982                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7983                                        N00.getOperand(0)),
7984                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7985                                        N00.getOperand(1)), N1);
7986     }
7987 
7988     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
7989     // Note: Commutes FADD operands.
7990     if (N1.getOpcode() == ISD::FP_EXTEND) {
7991       SDValue N10 = N1.getOperand(0);
7992       if (N10.getOpcode() == ISD::FMUL)
7993         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7994                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7995                                        N10.getOperand(0)),
7996                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7997                                        N10.getOperand(1)), N0);
7998     }
7999   }
8000 
8001   // More folding opportunities when target permits.
8002   if ((AllowFusion || HasFMAD)  && Aggressive) {
8003     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8004     if (N0.getOpcode() == PreferredFusedOpcode &&
8005         N0.getOperand(2).getOpcode() == ISD::FMUL) {
8006       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8007                          N0.getOperand(0), N0.getOperand(1),
8008                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8009                                      N0.getOperand(2).getOperand(0),
8010                                      N0.getOperand(2).getOperand(1),
8011                                      N1));
8012     }
8013 
8014     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8015     if (N1->getOpcode() == PreferredFusedOpcode &&
8016         N1.getOperand(2).getOpcode() == ISD::FMUL) {
8017       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8018                          N1.getOperand(0), N1.getOperand(1),
8019                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8020                                      N1.getOperand(2).getOperand(0),
8021                                      N1.getOperand(2).getOperand(1),
8022                                      N0));
8023     }
8024 
8025     if (AllowFusion && LookThroughFPExt) {
8026       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8027       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8028       auto FoldFAddFMAFPExtFMul = [&] (
8029           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8030         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8031                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8032                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8033                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8034                                        Z));
8035       };
8036       if (N0.getOpcode() == PreferredFusedOpcode) {
8037         SDValue N02 = N0.getOperand(2);
8038         if (N02.getOpcode() == ISD::FP_EXTEND) {
8039           SDValue N020 = N02.getOperand(0);
8040           if (N020.getOpcode() == ISD::FMUL)
8041             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8042                                         N020.getOperand(0), N020.getOperand(1),
8043                                         N1);
8044         }
8045       }
8046 
8047       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8048       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8049       // FIXME: This turns two single-precision and one double-precision
8050       // operation into two double-precision operations, which might not be
8051       // interesting for all targets, especially GPUs.
8052       auto FoldFAddFPExtFMAFMul = [&] (
8053           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8054         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8055                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8056                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8057                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8058                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8059                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8060                                        Z));
8061       };
8062       if (N0.getOpcode() == ISD::FP_EXTEND) {
8063         SDValue N00 = N0.getOperand(0);
8064         if (N00.getOpcode() == PreferredFusedOpcode) {
8065           SDValue N002 = N00.getOperand(2);
8066           if (N002.getOpcode() == ISD::FMUL)
8067             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
8068                                         N002.getOperand(0), N002.getOperand(1),
8069                                         N1);
8070         }
8071       }
8072 
8073       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
8074       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
8075       if (N1.getOpcode() == PreferredFusedOpcode) {
8076         SDValue N12 = N1.getOperand(2);
8077         if (N12.getOpcode() == ISD::FP_EXTEND) {
8078           SDValue N120 = N12.getOperand(0);
8079           if (N120.getOpcode() == ISD::FMUL)
8080             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
8081                                         N120.getOperand(0), N120.getOperand(1),
8082                                         N0);
8083         }
8084       }
8085 
8086       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
8087       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
8088       // FIXME: This turns two single-precision and one double-precision
8089       // operation into two double-precision operations, which might not be
8090       // interesting for all targets, especially GPUs.
8091       if (N1.getOpcode() == ISD::FP_EXTEND) {
8092         SDValue N10 = N1.getOperand(0);
8093         if (N10.getOpcode() == PreferredFusedOpcode) {
8094           SDValue N102 = N10.getOperand(2);
8095           if (N102.getOpcode() == ISD::FMUL)
8096             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
8097                                         N102.getOperand(0), N102.getOperand(1),
8098                                         N0);
8099         }
8100       }
8101     }
8102   }
8103 
8104   return SDValue();
8105 }
8106 
8107 /// Try to perform FMA combining on a given FSUB node.
8108 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8109   SDValue N0 = N->getOperand(0);
8110   SDValue N1 = N->getOperand(1);
8111   EVT VT = N->getValueType(0);
8112   SDLoc SL(N);
8113 
8114   const TargetOptions &Options = DAG.getTarget().Options;
8115   bool AllowFusion =
8116       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8117 
8118   // Floating-point multiply-add with intermediate rounding.
8119   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8120 
8121   // Floating-point multiply-add without intermediate rounding.
8122   bool HasFMA =
8123       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8124       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8125 
8126   // No valid opcode, do not combine.
8127   if (!HasFMAD && !HasFMA)
8128     return SDValue();
8129 
8130   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8131   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8132     return SDValue();
8133 
8134   // Always prefer FMAD to FMA for precision.
8135   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8136   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8137   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8138 
8139   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8140   if (N0.getOpcode() == ISD::FMUL &&
8141       (Aggressive || N0->hasOneUse())) {
8142     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8143                        N0.getOperand(0), N0.getOperand(1),
8144                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8145   }
8146 
8147   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8148   // Note: Commutes FSUB operands.
8149   if (N1.getOpcode() == ISD::FMUL &&
8150       (Aggressive || N1->hasOneUse()))
8151     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8152                        DAG.getNode(ISD::FNEG, SL, VT,
8153                                    N1.getOperand(0)),
8154                        N1.getOperand(1), N0);
8155 
8156   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8157   if (N0.getOpcode() == ISD::FNEG &&
8158       N0.getOperand(0).getOpcode() == ISD::FMUL &&
8159       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8160     SDValue N00 = N0.getOperand(0).getOperand(0);
8161     SDValue N01 = N0.getOperand(0).getOperand(1);
8162     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8163                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8164                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8165   }
8166 
8167   // Look through FP_EXTEND nodes to do more combining.
8168   if (AllowFusion && LookThroughFPExt) {
8169     // fold (fsub (fpext (fmul x, y)), z)
8170     //   -> (fma (fpext x), (fpext y), (fneg z))
8171     if (N0.getOpcode() == ISD::FP_EXTEND) {
8172       SDValue N00 = N0.getOperand(0);
8173       if (N00.getOpcode() == ISD::FMUL)
8174         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8175                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8176                                        N00.getOperand(0)),
8177                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8178                                        N00.getOperand(1)),
8179                            DAG.getNode(ISD::FNEG, SL, VT, N1));
8180     }
8181 
8182     // fold (fsub x, (fpext (fmul y, z)))
8183     //   -> (fma (fneg (fpext y)), (fpext z), x)
8184     // Note: Commutes FSUB operands.
8185     if (N1.getOpcode() == ISD::FP_EXTEND) {
8186       SDValue N10 = N1.getOperand(0);
8187       if (N10.getOpcode() == ISD::FMUL)
8188         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8189                            DAG.getNode(ISD::FNEG, SL, VT,
8190                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
8191                                                    N10.getOperand(0))),
8192                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8193                                        N10.getOperand(1)),
8194                            N0);
8195     }
8196 
8197     // fold (fsub (fpext (fneg (fmul, x, y))), z)
8198     //   -> (fneg (fma (fpext x), (fpext y), z))
8199     // Note: This could be removed with appropriate canonicalization of the
8200     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8201     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8202     // from implementing the canonicalization in visitFSUB.
8203     if (N0.getOpcode() == ISD::FP_EXTEND) {
8204       SDValue N00 = N0.getOperand(0);
8205       if (N00.getOpcode() == ISD::FNEG) {
8206         SDValue N000 = N00.getOperand(0);
8207         if (N000.getOpcode() == ISD::FMUL) {
8208           return DAG.getNode(ISD::FNEG, SL, VT,
8209                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8210                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8211                                                      N000.getOperand(0)),
8212                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8213                                                      N000.getOperand(1)),
8214                                          N1));
8215         }
8216       }
8217     }
8218 
8219     // fold (fsub (fneg (fpext (fmul, x, y))), z)
8220     //   -> (fneg (fma (fpext x)), (fpext y), z)
8221     // Note: This could be removed with appropriate canonicalization of the
8222     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8223     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8224     // from implementing the canonicalization in visitFSUB.
8225     if (N0.getOpcode() == ISD::FNEG) {
8226       SDValue N00 = N0.getOperand(0);
8227       if (N00.getOpcode() == ISD::FP_EXTEND) {
8228         SDValue N000 = N00.getOperand(0);
8229         if (N000.getOpcode() == ISD::FMUL) {
8230           return DAG.getNode(ISD::FNEG, SL, VT,
8231                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8232                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8233                                                      N000.getOperand(0)),
8234                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8235                                                      N000.getOperand(1)),
8236                                          N1));
8237         }
8238       }
8239     }
8240 
8241   }
8242 
8243   // More folding opportunities when target permits.
8244   if ((AllowFusion || HasFMAD) && Aggressive) {
8245     // fold (fsub (fma x, y, (fmul u, v)), z)
8246     //   -> (fma x, y (fma u, v, (fneg z)))
8247     if (N0.getOpcode() == PreferredFusedOpcode &&
8248         N0.getOperand(2).getOpcode() == ISD::FMUL) {
8249       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8250                          N0.getOperand(0), N0.getOperand(1),
8251                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8252                                      N0.getOperand(2).getOperand(0),
8253                                      N0.getOperand(2).getOperand(1),
8254                                      DAG.getNode(ISD::FNEG, SL, VT,
8255                                                  N1)));
8256     }
8257 
8258     // fold (fsub x, (fma y, z, (fmul u, v)))
8259     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
8260     if (N1.getOpcode() == PreferredFusedOpcode &&
8261         N1.getOperand(2).getOpcode() == ISD::FMUL) {
8262       SDValue N20 = N1.getOperand(2).getOperand(0);
8263       SDValue N21 = N1.getOperand(2).getOperand(1);
8264       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8265                          DAG.getNode(ISD::FNEG, SL, VT,
8266                                      N1.getOperand(0)),
8267                          N1.getOperand(1),
8268                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8269                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
8270 
8271                                      N21, N0));
8272     }
8273 
8274     if (AllowFusion && LookThroughFPExt) {
8275       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
8276       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
8277       if (N0.getOpcode() == PreferredFusedOpcode) {
8278         SDValue N02 = N0.getOperand(2);
8279         if (N02.getOpcode() == ISD::FP_EXTEND) {
8280           SDValue N020 = N02.getOperand(0);
8281           if (N020.getOpcode() == ISD::FMUL)
8282             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8283                                N0.getOperand(0), N0.getOperand(1),
8284                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8285                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8286                                                        N020.getOperand(0)),
8287                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8288                                                        N020.getOperand(1)),
8289                                            DAG.getNode(ISD::FNEG, SL, VT,
8290                                                        N1)));
8291         }
8292       }
8293 
8294       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
8295       //   -> (fma (fpext x), (fpext y),
8296       //           (fma (fpext u), (fpext v), (fneg z)))
8297       // FIXME: This turns two single-precision and one double-precision
8298       // operation into two double-precision operations, which might not be
8299       // interesting for all targets, especially GPUs.
8300       if (N0.getOpcode() == ISD::FP_EXTEND) {
8301         SDValue N00 = N0.getOperand(0);
8302         if (N00.getOpcode() == PreferredFusedOpcode) {
8303           SDValue N002 = N00.getOperand(2);
8304           if (N002.getOpcode() == ISD::FMUL)
8305             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8306                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8307                                            N00.getOperand(0)),
8308                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8309                                            N00.getOperand(1)),
8310                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8311                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8312                                                        N002.getOperand(0)),
8313                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8314                                                        N002.getOperand(1)),
8315                                            DAG.getNode(ISD::FNEG, SL, VT,
8316                                                        N1)));
8317         }
8318       }
8319 
8320       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
8321       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
8322       if (N1.getOpcode() == PreferredFusedOpcode &&
8323         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
8324         SDValue N120 = N1.getOperand(2).getOperand(0);
8325         if (N120.getOpcode() == ISD::FMUL) {
8326           SDValue N1200 = N120.getOperand(0);
8327           SDValue N1201 = N120.getOperand(1);
8328           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8329                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
8330                              N1.getOperand(1),
8331                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8332                                          DAG.getNode(ISD::FNEG, SL, VT,
8333                                              DAG.getNode(ISD::FP_EXTEND, SL,
8334                                                          VT, N1200)),
8335                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8336                                                      N1201),
8337                                          N0));
8338         }
8339       }
8340 
8341       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
8342       //   -> (fma (fneg (fpext y)), (fpext z),
8343       //           (fma (fneg (fpext u)), (fpext v), x))
8344       // FIXME: This turns two single-precision and one double-precision
8345       // operation into two double-precision operations, which might not be
8346       // interesting for all targets, especially GPUs.
8347       if (N1.getOpcode() == ISD::FP_EXTEND &&
8348         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
8349         SDValue N100 = N1.getOperand(0).getOperand(0);
8350         SDValue N101 = N1.getOperand(0).getOperand(1);
8351         SDValue N102 = N1.getOperand(0).getOperand(2);
8352         if (N102.getOpcode() == ISD::FMUL) {
8353           SDValue N1020 = N102.getOperand(0);
8354           SDValue N1021 = N102.getOperand(1);
8355           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8356                              DAG.getNode(ISD::FNEG, SL, VT,
8357                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8358                                                      N100)),
8359                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
8360                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8361                                          DAG.getNode(ISD::FNEG, SL, VT,
8362                                              DAG.getNode(ISD::FP_EXTEND, SL,
8363                                                          VT, N1020)),
8364                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8365                                                      N1021),
8366                                          N0));
8367         }
8368       }
8369     }
8370   }
8371 
8372   return SDValue();
8373 }
8374 
8375 /// Try to perform FMA combining on a given FMUL node.
8376 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
8377   SDValue N0 = N->getOperand(0);
8378   SDValue N1 = N->getOperand(1);
8379   EVT VT = N->getValueType(0);
8380   SDLoc SL(N);
8381 
8382   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
8383 
8384   const TargetOptions &Options = DAG.getTarget().Options;
8385   bool AllowFusion =
8386       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8387 
8388   // Floating-point multiply-add with intermediate rounding.
8389   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8390 
8391   // Floating-point multiply-add without intermediate rounding.
8392   bool HasFMA =
8393       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8394       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8395 
8396   // No valid opcode, do not combine.
8397   if (!HasFMAD && !HasFMA)
8398     return SDValue();
8399 
8400   // Always prefer FMAD to FMA for precision.
8401   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8402   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8403 
8404   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
8405   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
8406   auto FuseFADD = [&](SDValue X, SDValue Y) {
8407     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
8408       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8409       if (XC1 && XC1->isExactlyValue(+1.0))
8410         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8411       if (XC1 && XC1->isExactlyValue(-1.0))
8412         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8413                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8414     }
8415     return SDValue();
8416   };
8417 
8418   if (SDValue FMA = FuseFADD(N0, N1))
8419     return FMA;
8420   if (SDValue FMA = FuseFADD(N1, N0))
8421     return FMA;
8422 
8423   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
8424   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
8425   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
8426   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
8427   auto FuseFSUB = [&](SDValue X, SDValue Y) {
8428     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
8429       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
8430       if (XC0 && XC0->isExactlyValue(+1.0))
8431         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8432                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8433                            Y);
8434       if (XC0 && XC0->isExactlyValue(-1.0))
8435         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8436                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8437                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8438 
8439       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8440       if (XC1 && XC1->isExactlyValue(+1.0))
8441         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8442                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8443       if (XC1 && XC1->isExactlyValue(-1.0))
8444         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8445     }
8446     return SDValue();
8447   };
8448 
8449   if (SDValue FMA = FuseFSUB(N0, N1))
8450     return FMA;
8451   if (SDValue FMA = FuseFSUB(N1, N0))
8452     return FMA;
8453 
8454   return SDValue();
8455 }
8456 
8457 SDValue DAGCombiner::visitFADD(SDNode *N) {
8458   SDValue N0 = N->getOperand(0);
8459   SDValue N1 = N->getOperand(1);
8460   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
8461   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
8462   EVT VT = N->getValueType(0);
8463   SDLoc DL(N);
8464   const TargetOptions &Options = DAG.getTarget().Options;
8465   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8466 
8467   // fold vector ops
8468   if (VT.isVector())
8469     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8470       return FoldedVOp;
8471 
8472   // fold (fadd c1, c2) -> c1 + c2
8473   if (N0CFP && N1CFP)
8474     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
8475 
8476   // canonicalize constant to RHS
8477   if (N0CFP && !N1CFP)
8478     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
8479 
8480   // fold (fadd A, (fneg B)) -> (fsub A, B)
8481   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8482       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
8483     return DAG.getNode(ISD::FSUB, DL, VT, N0,
8484                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8485 
8486   // fold (fadd (fneg A), B) -> (fsub B, A)
8487   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8488       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
8489     return DAG.getNode(ISD::FSUB, DL, VT, N1,
8490                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
8491 
8492   // If 'unsafe math' is enabled, fold lots of things.
8493   if (Options.UnsafeFPMath) {
8494     // No FP constant should be created after legalization as Instruction
8495     // Selection pass has a hard time dealing with FP constants.
8496     bool AllowNewConst = (Level < AfterLegalizeDAG);
8497 
8498     // fold (fadd A, 0) -> A
8499     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
8500       if (N1C->isZero())
8501         return N0;
8502 
8503     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
8504     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
8505         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
8506       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
8507                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
8508                                      Flags),
8509                          Flags);
8510 
8511     // If allowed, fold (fadd (fneg x), x) -> 0.0
8512     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
8513       return DAG.getConstantFP(0.0, DL, VT);
8514 
8515     // If allowed, fold (fadd x, (fneg x)) -> 0.0
8516     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
8517       return DAG.getConstantFP(0.0, DL, VT);
8518 
8519     // We can fold chains of FADD's of the same value into multiplications.
8520     // This transform is not safe in general because we are reducing the number
8521     // of rounding steps.
8522     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
8523       if (N0.getOpcode() == ISD::FMUL) {
8524         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8525         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
8526 
8527         // (fadd (fmul x, c), x) -> (fmul x, c+1)
8528         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
8529           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8530                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8531           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
8532         }
8533 
8534         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
8535         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
8536             N1.getOperand(0) == N1.getOperand(1) &&
8537             N0.getOperand(0) == N1.getOperand(0)) {
8538           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8539                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8540           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
8541         }
8542       }
8543 
8544       if (N1.getOpcode() == ISD::FMUL) {
8545         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8546         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
8547 
8548         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
8549         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
8550           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8551                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8552           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
8553         }
8554 
8555         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
8556         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
8557             N0.getOperand(0) == N0.getOperand(1) &&
8558             N1.getOperand(0) == N0.getOperand(0)) {
8559           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8560                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8561           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
8562         }
8563       }
8564 
8565       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
8566         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8567         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
8568         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
8569             (N0.getOperand(0) == N1)) {
8570           return DAG.getNode(ISD::FMUL, DL, VT,
8571                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
8572         }
8573       }
8574 
8575       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
8576         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8577         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
8578         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
8579             N1.getOperand(0) == N0) {
8580           return DAG.getNode(ISD::FMUL, DL, VT,
8581                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
8582         }
8583       }
8584 
8585       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
8586       if (AllowNewConst &&
8587           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
8588           N0.getOperand(0) == N0.getOperand(1) &&
8589           N1.getOperand(0) == N1.getOperand(1) &&
8590           N0.getOperand(0) == N1.getOperand(0)) {
8591         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
8592                            DAG.getConstantFP(4.0, DL, VT), Flags);
8593       }
8594     }
8595   } // enable-unsafe-fp-math
8596 
8597   // FADD -> FMA combines:
8598   if (SDValue Fused = visitFADDForFMACombine(N)) {
8599     AddToWorklist(Fused.getNode());
8600     return Fused;
8601   }
8602   return SDValue();
8603 }
8604 
8605 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8606   SDValue N0 = N->getOperand(0);
8607   SDValue N1 = N->getOperand(1);
8608   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8609   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8610   EVT VT = N->getValueType(0);
8611   SDLoc DL(N);
8612   const TargetOptions &Options = DAG.getTarget().Options;
8613   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8614 
8615   // fold vector ops
8616   if (VT.isVector())
8617     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8618       return FoldedVOp;
8619 
8620   // fold (fsub c1, c2) -> c1-c2
8621   if (N0CFP && N1CFP)
8622     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
8623 
8624   // fold (fsub A, (fneg B)) -> (fadd A, B)
8625   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8626     return DAG.getNode(ISD::FADD, DL, VT, N0,
8627                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8628 
8629   // If 'unsafe math' is enabled, fold lots of things.
8630   if (Options.UnsafeFPMath) {
8631     // (fsub A, 0) -> A
8632     if (N1CFP && N1CFP->isZero())
8633       return N0;
8634 
8635     // (fsub 0, B) -> -B
8636     if (N0CFP && N0CFP->isZero()) {
8637       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8638         return GetNegatedExpression(N1, DAG, LegalOperations);
8639       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8640         return DAG.getNode(ISD::FNEG, DL, VT, N1);
8641     }
8642 
8643     // (fsub x, x) -> 0.0
8644     if (N0 == N1)
8645       return DAG.getConstantFP(0.0f, DL, VT);
8646 
8647     // (fsub x, (fadd x, y)) -> (fneg y)
8648     // (fsub x, (fadd y, x)) -> (fneg y)
8649     if (N1.getOpcode() == ISD::FADD) {
8650       SDValue N10 = N1->getOperand(0);
8651       SDValue N11 = N1->getOperand(1);
8652 
8653       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8654         return GetNegatedExpression(N11, DAG, LegalOperations);
8655 
8656       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8657         return GetNegatedExpression(N10, DAG, LegalOperations);
8658     }
8659   }
8660 
8661   // FSUB -> FMA combines:
8662   if (SDValue Fused = visitFSUBForFMACombine(N)) {
8663     AddToWorklist(Fused.getNode());
8664     return Fused;
8665   }
8666 
8667   return SDValue();
8668 }
8669 
8670 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8671   SDValue N0 = N->getOperand(0);
8672   SDValue N1 = N->getOperand(1);
8673   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8674   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8675   EVT VT = N->getValueType(0);
8676   SDLoc DL(N);
8677   const TargetOptions &Options = DAG.getTarget().Options;
8678   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8679 
8680   // fold vector ops
8681   if (VT.isVector()) {
8682     // This just handles C1 * C2 for vectors. Other vector folds are below.
8683     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8684       return FoldedVOp;
8685   }
8686 
8687   // fold (fmul c1, c2) -> c1*c2
8688   if (N0CFP && N1CFP)
8689     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
8690 
8691   // canonicalize constant to RHS
8692   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8693      !isConstantFPBuildVectorOrConstantFP(N1))
8694     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
8695 
8696   // fold (fmul A, 1.0) -> A
8697   if (N1CFP && N1CFP->isExactlyValue(1.0))
8698     return N0;
8699 
8700   if (Options.UnsafeFPMath) {
8701     // fold (fmul A, 0) -> 0
8702     if (N1CFP && N1CFP->isZero())
8703       return N1;
8704 
8705     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8706     if (N0.getOpcode() == ISD::FMUL) {
8707       // Fold scalars or any vector constants (not just splats).
8708       // This fold is done in general by InstCombine, but extra fmul insts
8709       // may have been generated during lowering.
8710       SDValue N00 = N0.getOperand(0);
8711       SDValue N01 = N0.getOperand(1);
8712       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8713       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8714       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8715 
8716       // Check 1: Make sure that the first operand of the inner multiply is NOT
8717       // a constant. Otherwise, we may induce infinite looping.
8718       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8719         // Check 2: Make sure that the second operand of the inner multiply and
8720         // the second operand of the outer multiply are constants.
8721         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8722             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8723           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
8724           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
8725         }
8726       }
8727     }
8728 
8729     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8730     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8731     // during an early run of DAGCombiner can prevent folding with fmuls
8732     // inserted during lowering.
8733     if (N0.getOpcode() == ISD::FADD &&
8734         (N0.getOperand(0) == N0.getOperand(1)) &&
8735         N0.hasOneUse()) {
8736       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8737       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
8738       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
8739     }
8740   }
8741 
8742   // fold (fmul X, 2.0) -> (fadd X, X)
8743   if (N1CFP && N1CFP->isExactlyValue(+2.0))
8744     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
8745 
8746   // fold (fmul X, -1.0) -> (fneg X)
8747   if (N1CFP && N1CFP->isExactlyValue(-1.0))
8748     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8749       return DAG.getNode(ISD::FNEG, DL, VT, N0);
8750 
8751   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8752   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8753     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8754       // Both can be negated for free, check to see if at least one is cheaper
8755       // negated.
8756       if (LHSNeg == 2 || RHSNeg == 2)
8757         return DAG.getNode(ISD::FMUL, DL, VT,
8758                            GetNegatedExpression(N0, DAG, LegalOperations),
8759                            GetNegatedExpression(N1, DAG, LegalOperations),
8760                            Flags);
8761     }
8762   }
8763 
8764   // FMUL -> FMA combines:
8765   if (SDValue Fused = visitFMULForFMACombine(N)) {
8766     AddToWorklist(Fused.getNode());
8767     return Fused;
8768   }
8769 
8770   return SDValue();
8771 }
8772 
8773 SDValue DAGCombiner::visitFMA(SDNode *N) {
8774   SDValue N0 = N->getOperand(0);
8775   SDValue N1 = N->getOperand(1);
8776   SDValue N2 = N->getOperand(2);
8777   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8778   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8779   EVT VT = N->getValueType(0);
8780   SDLoc DL(N);
8781   const TargetOptions &Options = DAG.getTarget().Options;
8782 
8783   // Constant fold FMA.
8784   if (isa<ConstantFPSDNode>(N0) &&
8785       isa<ConstantFPSDNode>(N1) &&
8786       isa<ConstantFPSDNode>(N2)) {
8787     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
8788   }
8789 
8790   if (Options.UnsafeFPMath) {
8791     if (N0CFP && N0CFP->isZero())
8792       return N2;
8793     if (N1CFP && N1CFP->isZero())
8794       return N2;
8795   }
8796   // TODO: The FMA node should have flags that propagate to these nodes.
8797   if (N0CFP && N0CFP->isExactlyValue(1.0))
8798     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8799   if (N1CFP && N1CFP->isExactlyValue(1.0))
8800     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8801 
8802   // Canonicalize (fma c, x, y) -> (fma x, c, y)
8803   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8804      !isConstantFPBuildVectorOrConstantFP(N1))
8805     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8806 
8807   // TODO: FMA nodes should have flags that propagate to the created nodes.
8808   // For now, create a Flags object for use with all unsafe math transforms.
8809   SDNodeFlags Flags;
8810   Flags.setUnsafeAlgebra(true);
8811 
8812   if (Options.UnsafeFPMath) {
8813     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8814     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
8815         isConstantFPBuildVectorOrConstantFP(N1) &&
8816         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
8817       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8818                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
8819                                      &Flags), &Flags);
8820     }
8821 
8822     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8823     if (N0.getOpcode() == ISD::FMUL &&
8824         isConstantFPBuildVectorOrConstantFP(N1) &&
8825         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
8826       return DAG.getNode(ISD::FMA, DL, VT,
8827                          N0.getOperand(0),
8828                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
8829                                      &Flags),
8830                          N2);
8831     }
8832   }
8833 
8834   // (fma x, 1, y) -> (fadd x, y)
8835   // (fma x, -1, y) -> (fadd (fneg x), y)
8836   if (N1CFP) {
8837     if (N1CFP->isExactlyValue(1.0))
8838       // TODO: The FMA node should have flags that propagate to this node.
8839       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
8840 
8841     if (N1CFP->isExactlyValue(-1.0) &&
8842         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8843       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
8844       AddToWorklist(RHSNeg.getNode());
8845       // TODO: The FMA node should have flags that propagate to this node.
8846       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
8847     }
8848   }
8849 
8850   if (Options.UnsafeFPMath) {
8851     // (fma x, c, x) -> (fmul x, (c+1))
8852     if (N1CFP && N0 == N2) {
8853       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8854                          DAG.getNode(ISD::FADD, DL, VT, N1,
8855                                      DAG.getConstantFP(1.0, DL, VT), &Flags),
8856                          &Flags);
8857     }
8858 
8859     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
8860     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
8861       return DAG.getNode(ISD::FMUL, DL, VT, N0,
8862                          DAG.getNode(ISD::FADD, DL, VT, N1,
8863                                      DAG.getConstantFP(-1.0, DL, VT), &Flags),
8864                          &Flags);
8865     }
8866   }
8867 
8868   return SDValue();
8869 }
8870 
8871 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8872 // reciprocal.
8873 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8874 // Notice that this is not always beneficial. One reason is different target
8875 // may have different costs for FDIV and FMUL, so sometimes the cost of two
8876 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8877 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8878 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
8879   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
8880   const SDNodeFlags *Flags = N->getFlags();
8881   if (!UnsafeMath && !Flags->hasAllowReciprocal())
8882     return SDValue();
8883 
8884   // Skip if current node is a reciprocal.
8885   SDValue N0 = N->getOperand(0);
8886   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8887   if (N0CFP && N0CFP->isExactlyValue(1.0))
8888     return SDValue();
8889 
8890   // Exit early if the target does not want this transform or if there can't
8891   // possibly be enough uses of the divisor to make the transform worthwhile.
8892   SDValue N1 = N->getOperand(1);
8893   unsigned MinUses = TLI.combineRepeatedFPDivisors();
8894   if (!MinUses || N1->use_size() < MinUses)
8895     return SDValue();
8896 
8897   // Find all FDIV users of the same divisor.
8898   // Use a set because duplicates may be present in the user list.
8899   SetVector<SDNode *> Users;
8900   for (auto *U : N1->uses()) {
8901     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
8902       // This division is eligible for optimization only if global unsafe math
8903       // is enabled or if this division allows reciprocal formation.
8904       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
8905         Users.insert(U);
8906     }
8907   }
8908 
8909   // Now that we have the actual number of divisor uses, make sure it meets
8910   // the minimum threshold specified by the target.
8911   if (Users.size() < MinUses)
8912     return SDValue();
8913 
8914   EVT VT = N->getValueType(0);
8915   SDLoc DL(N);
8916   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8917   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
8918 
8919   // Dividend / Divisor -> Dividend * Reciprocal
8920   for (auto *U : Users) {
8921     SDValue Dividend = U->getOperand(0);
8922     if (Dividend != FPOne) {
8923       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8924                                     Reciprocal, Flags);
8925       CombineTo(U, NewNode);
8926     } else if (U != Reciprocal.getNode()) {
8927       // In the absence of fast-math-flags, this user node is always the
8928       // same node as Reciprocal, but with FMF they may be different nodes.
8929       CombineTo(U, Reciprocal);
8930     }
8931   }
8932   return SDValue(N, 0);  // N was replaced.
8933 }
8934 
8935 SDValue DAGCombiner::visitFDIV(SDNode *N) {
8936   SDValue N0 = N->getOperand(0);
8937   SDValue N1 = N->getOperand(1);
8938   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8939   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8940   EVT VT = N->getValueType(0);
8941   SDLoc DL(N);
8942   const TargetOptions &Options = DAG.getTarget().Options;
8943   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8944 
8945   // fold vector ops
8946   if (VT.isVector())
8947     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8948       return FoldedVOp;
8949 
8950   // fold (fdiv c1, c2) -> c1/c2
8951   if (N0CFP && N1CFP)
8952     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
8953 
8954   if (Options.UnsafeFPMath) {
8955     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
8956     if (N1CFP) {
8957       // Compute the reciprocal 1.0 / c2.
8958       const APFloat &N1APF = N1CFP->getValueAPF();
8959       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
8960       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
8961       // Only do the transform if the reciprocal is a legal fp immediate that
8962       // isn't too nasty (eg NaN, denormal, ...).
8963       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
8964           (!LegalOperations ||
8965            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
8966            // backend)... we should handle this gracefully after Legalize.
8967            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
8968            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
8969            TLI.isFPImmLegal(Recip, VT)))
8970         return DAG.getNode(ISD::FMUL, DL, VT, N0,
8971                            DAG.getConstantFP(Recip, DL, VT), Flags);
8972     }
8973 
8974     // If this FDIV is part of a reciprocal square root, it may be folded
8975     // into a target-specific square root estimate instruction.
8976     if (N1.getOpcode() == ISD::FSQRT) {
8977       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
8978         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8979       }
8980     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
8981                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8982       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8983                                           Flags)) {
8984         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
8985         AddToWorklist(RV.getNode());
8986         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8987       }
8988     } else if (N1.getOpcode() == ISD::FP_ROUND &&
8989                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8990       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8991                                           Flags)) {
8992         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
8993         AddToWorklist(RV.getNode());
8994         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8995       }
8996     } else if (N1.getOpcode() == ISD::FMUL) {
8997       // Look through an FMUL. Even though this won't remove the FDIV directly,
8998       // it's still worthwhile to get rid of the FSQRT if possible.
8999       SDValue SqrtOp;
9000       SDValue OtherOp;
9001       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9002         SqrtOp = N1.getOperand(0);
9003         OtherOp = N1.getOperand(1);
9004       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
9005         SqrtOp = N1.getOperand(1);
9006         OtherOp = N1.getOperand(0);
9007       }
9008       if (SqrtOp.getNode()) {
9009         // We found a FSQRT, so try to make this fold:
9010         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
9011         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
9012           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
9013           AddToWorklist(RV.getNode());
9014           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9015         }
9016       }
9017     }
9018 
9019     // Fold into a reciprocal estimate and multiply instead of a real divide.
9020     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
9021       AddToWorklist(RV.getNode());
9022       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9023     }
9024   }
9025 
9026   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
9027   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9028     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9029       // Both can be negated for free, check to see if at least one is cheaper
9030       // negated.
9031       if (LHSNeg == 2 || RHSNeg == 2)
9032         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
9033                            GetNegatedExpression(N0, DAG, LegalOperations),
9034                            GetNegatedExpression(N1, DAG, LegalOperations),
9035                            Flags);
9036     }
9037   }
9038 
9039   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
9040     return CombineRepeatedDivisors;
9041 
9042   return SDValue();
9043 }
9044 
9045 SDValue DAGCombiner::visitFREM(SDNode *N) {
9046   SDValue N0 = N->getOperand(0);
9047   SDValue N1 = N->getOperand(1);
9048   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9049   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9050   EVT VT = N->getValueType(0);
9051 
9052   // fold (frem c1, c2) -> fmod(c1,c2)
9053   if (N0CFP && N1CFP)
9054     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
9055                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
9056 
9057   return SDValue();
9058 }
9059 
9060 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
9061   if (!DAG.getTarget().Options.UnsafeFPMath)
9062     return SDValue();
9063 
9064   SDValue N0 = N->getOperand(0);
9065   if (TLI.isFsqrtCheap(N0, DAG))
9066     return SDValue();
9067 
9068   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
9069   // For now, create a Flags object for use with all unsafe math transforms.
9070   SDNodeFlags Flags;
9071   Flags.setUnsafeAlgebra(true);
9072   return buildSqrtEstimate(N0, &Flags);
9073 }
9074 
9075 /// copysign(x, fp_extend(y)) -> copysign(x, y)
9076 /// copysign(x, fp_round(y)) -> copysign(x, y)
9077 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
9078   SDValue N1 = N->getOperand(1);
9079   if ((N1.getOpcode() == ISD::FP_EXTEND ||
9080        N1.getOpcode() == ISD::FP_ROUND)) {
9081     // Do not optimize out type conversion of f128 type yet.
9082     // For some targets like x86_64, configuration is changed to keep one f128
9083     // value in one SSE register, but instruction selection cannot handle
9084     // FCOPYSIGN on SSE registers yet.
9085     EVT N1VT = N1->getValueType(0);
9086     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
9087     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
9088   }
9089   return false;
9090 }
9091 
9092 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
9093   SDValue N0 = N->getOperand(0);
9094   SDValue N1 = N->getOperand(1);
9095   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9096   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9097   EVT VT = N->getValueType(0);
9098 
9099   if (N0CFP && N1CFP) // Constant fold
9100     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
9101 
9102   if (N1CFP) {
9103     const APFloat &V = N1CFP->getValueAPF();
9104     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
9105     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9106     if (!V.isNegative()) {
9107       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9108         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9109     } else {
9110       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9111         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9112                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9113     }
9114   }
9115 
9116   // copysign(fabs(x), y) -> copysign(x, y)
9117   // copysign(fneg(x), y) -> copysign(x, y)
9118   // copysign(copysign(x,z), y) -> copysign(x, y)
9119   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9120       N0.getOpcode() == ISD::FCOPYSIGN)
9121     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
9122 
9123   // copysign(x, abs(y)) -> abs(x)
9124   if (N1.getOpcode() == ISD::FABS)
9125     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9126 
9127   // copysign(x, copysign(y,z)) -> copysign(x, z)
9128   if (N1.getOpcode() == ISD::FCOPYSIGN)
9129     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
9130 
9131   // copysign(x, fp_extend(y)) -> copysign(x, y)
9132   // copysign(x, fp_round(y)) -> copysign(x, y)
9133   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
9134     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
9135 
9136   return SDValue();
9137 }
9138 
9139 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9140   SDValue N0 = N->getOperand(0);
9141   EVT VT = N->getValueType(0);
9142   EVT OpVT = N0.getValueType();
9143 
9144   // fold (sint_to_fp c1) -> c1fp
9145   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9146       // ...but only if the target supports immediate floating-point values
9147       (!LegalOperations ||
9148        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9149     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9150 
9151   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9152   // but UINT_TO_FP is legal on this target, try to convert.
9153   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9154       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9155     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9156     if (DAG.SignBitIsZero(N0))
9157       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9158   }
9159 
9160   // The next optimizations are desirable only if SELECT_CC can be lowered.
9161   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9162     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9163     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9164         !VT.isVector() &&
9165         (!LegalOperations ||
9166          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9167       SDLoc DL(N);
9168       SDValue Ops[] =
9169         { N0.getOperand(0), N0.getOperand(1),
9170           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9171           N0.getOperand(2) };
9172       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9173     }
9174 
9175     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9176     //      (select_cc x, y, 1.0, 0.0,, cc)
9177     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9178         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9179         (!LegalOperations ||
9180          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9181       SDLoc DL(N);
9182       SDValue Ops[] =
9183         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
9184           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9185           N0.getOperand(0).getOperand(2) };
9186       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9187     }
9188   }
9189 
9190   return SDValue();
9191 }
9192 
9193 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
9194   SDValue N0 = N->getOperand(0);
9195   EVT VT = N->getValueType(0);
9196   EVT OpVT = N0.getValueType();
9197 
9198   // fold (uint_to_fp c1) -> c1fp
9199   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9200       // ...but only if the target supports immediate floating-point values
9201       (!LegalOperations ||
9202        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9203     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9204 
9205   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
9206   // but SINT_TO_FP is legal on this target, try to convert.
9207   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
9208       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
9209     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
9210     if (DAG.SignBitIsZero(N0))
9211       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9212   }
9213 
9214   // The next optimizations are desirable only if SELECT_CC can be lowered.
9215   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9216     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9217 
9218     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
9219         (!LegalOperations ||
9220          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9221       SDLoc DL(N);
9222       SDValue Ops[] =
9223         { N0.getOperand(0), N0.getOperand(1),
9224           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9225           N0.getOperand(2) };
9226       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9227     }
9228   }
9229 
9230   return SDValue();
9231 }
9232 
9233 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
9234 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
9235   SDValue N0 = N->getOperand(0);
9236   EVT VT = N->getValueType(0);
9237 
9238   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
9239     return SDValue();
9240 
9241   SDValue Src = N0.getOperand(0);
9242   EVT SrcVT = Src.getValueType();
9243   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
9244   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
9245 
9246   // We can safely assume the conversion won't overflow the output range,
9247   // because (for example) (uint8_t)18293.f is undefined behavior.
9248 
9249   // Since we can assume the conversion won't overflow, our decision as to
9250   // whether the input will fit in the float should depend on the minimum
9251   // of the input range and output range.
9252 
9253   // This means this is also safe for a signed input and unsigned output, since
9254   // a negative input would lead to undefined behavior.
9255   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
9256   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
9257   unsigned ActualSize = std::min(InputSize, OutputSize);
9258   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
9259 
9260   // We can only fold away the float conversion if the input range can be
9261   // represented exactly in the float range.
9262   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
9263     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
9264       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
9265                                                        : ISD::ZERO_EXTEND;
9266       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
9267     }
9268     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
9269       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
9270     return DAG.getBitcast(VT, Src);
9271   }
9272   return SDValue();
9273 }
9274 
9275 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
9276   SDValue N0 = N->getOperand(0);
9277   EVT VT = N->getValueType(0);
9278 
9279   // fold (fp_to_sint c1fp) -> c1
9280   if (isConstantFPBuildVectorOrConstantFP(N0))
9281     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
9282 
9283   return FoldIntToFPToInt(N, DAG);
9284 }
9285 
9286 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
9287   SDValue N0 = N->getOperand(0);
9288   EVT VT = N->getValueType(0);
9289 
9290   // fold (fp_to_uint c1fp) -> c1
9291   if (isConstantFPBuildVectorOrConstantFP(N0))
9292     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
9293 
9294   return FoldIntToFPToInt(N, DAG);
9295 }
9296 
9297 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
9298   SDValue N0 = N->getOperand(0);
9299   SDValue N1 = N->getOperand(1);
9300   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9301   EVT VT = N->getValueType(0);
9302 
9303   // fold (fp_round c1fp) -> c1fp
9304   if (N0CFP)
9305     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
9306 
9307   // fold (fp_round (fp_extend x)) -> x
9308   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
9309     return N0.getOperand(0);
9310 
9311   // fold (fp_round (fp_round x)) -> (fp_round x)
9312   if (N0.getOpcode() == ISD::FP_ROUND) {
9313     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
9314     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
9315 
9316     // Skip this folding if it results in an fp_round from f80 to f16.
9317     //
9318     // f80 to f16 always generates an expensive (and as yet, unimplemented)
9319     // libcall to __truncxfhf2 instead of selecting native f16 conversion
9320     // instructions from f32 or f64.  Moreover, the first (value-preserving)
9321     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
9322     // x86.
9323     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
9324       return SDValue();
9325 
9326     // If the first fp_round isn't a value preserving truncation, it might
9327     // introduce a tie in the second fp_round, that wouldn't occur in the
9328     // single-step fp_round we want to fold to.
9329     // In other words, double rounding isn't the same as rounding.
9330     // Also, this is a value preserving truncation iff both fp_round's are.
9331     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
9332       SDLoc DL(N);
9333       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
9334                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
9335     }
9336   }
9337 
9338   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
9339   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
9340     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
9341                               N0.getOperand(0), N1);
9342     AddToWorklist(Tmp.getNode());
9343     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9344                        Tmp, N0.getOperand(1));
9345   }
9346 
9347   return SDValue();
9348 }
9349 
9350 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
9351   SDValue N0 = N->getOperand(0);
9352   EVT VT = N->getValueType(0);
9353   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9354   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9355 
9356   // fold (fp_round_inreg c1fp) -> c1fp
9357   if (N0CFP && isTypeLegal(EVT)) {
9358     SDLoc DL(N);
9359     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
9360     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
9361   }
9362 
9363   return SDValue();
9364 }
9365 
9366 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
9367   SDValue N0 = N->getOperand(0);
9368   EVT VT = N->getValueType(0);
9369 
9370   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
9371   if (N->hasOneUse() &&
9372       N->use_begin()->getOpcode() == ISD::FP_ROUND)
9373     return SDValue();
9374 
9375   // fold (fp_extend c1fp) -> c1fp
9376   if (isConstantFPBuildVectorOrConstantFP(N0))
9377     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
9378 
9379   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
9380   if (N0.getOpcode() == ISD::FP16_TO_FP &&
9381       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
9382     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
9383 
9384   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
9385   // value of X.
9386   if (N0.getOpcode() == ISD::FP_ROUND
9387       && N0.getNode()->getConstantOperandVal(1) == 1) {
9388     SDValue In = N0.getOperand(0);
9389     if (In.getValueType() == VT) return In;
9390     if (VT.bitsLT(In.getValueType()))
9391       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
9392                          In, N0.getOperand(1));
9393     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
9394   }
9395 
9396   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
9397   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9398        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9399     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9400     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9401                                      LN0->getChain(),
9402                                      LN0->getBasePtr(), N0.getValueType(),
9403                                      LN0->getMemOperand());
9404     CombineTo(N, ExtLoad);
9405     CombineTo(N0.getNode(),
9406               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
9407                           N0.getValueType(), ExtLoad,
9408                           DAG.getIntPtrConstant(1, SDLoc(N0))),
9409               ExtLoad.getValue(1));
9410     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9411   }
9412 
9413   return SDValue();
9414 }
9415 
9416 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
9417   SDValue N0 = N->getOperand(0);
9418   EVT VT = N->getValueType(0);
9419 
9420   // fold (fceil c1) -> fceil(c1)
9421   if (isConstantFPBuildVectorOrConstantFP(N0))
9422     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
9423 
9424   return SDValue();
9425 }
9426 
9427 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
9428   SDValue N0 = N->getOperand(0);
9429   EVT VT = N->getValueType(0);
9430 
9431   // fold (ftrunc c1) -> ftrunc(c1)
9432   if (isConstantFPBuildVectorOrConstantFP(N0))
9433     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
9434 
9435   return SDValue();
9436 }
9437 
9438 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
9439   SDValue N0 = N->getOperand(0);
9440   EVT VT = N->getValueType(0);
9441 
9442   // fold (ffloor c1) -> ffloor(c1)
9443   if (isConstantFPBuildVectorOrConstantFP(N0))
9444     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
9445 
9446   return SDValue();
9447 }
9448 
9449 // FIXME: FNEG and FABS have a lot in common; refactor.
9450 SDValue DAGCombiner::visitFNEG(SDNode *N) {
9451   SDValue N0 = N->getOperand(0);
9452   EVT VT = N->getValueType(0);
9453 
9454   // Constant fold FNEG.
9455   if (isConstantFPBuildVectorOrConstantFP(N0))
9456     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
9457 
9458   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
9459                          &DAG.getTarget().Options))
9460     return GetNegatedExpression(N0, DAG, LegalOperations);
9461 
9462   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
9463   // constant pool values.
9464   if (!TLI.isFNegFree(VT) &&
9465       N0.getOpcode() == ISD::BITCAST &&
9466       N0.getNode()->hasOneUse()) {
9467     SDValue Int = N0.getOperand(0);
9468     EVT IntVT = Int.getValueType();
9469     if (IntVT.isInteger() && !IntVT.isVector()) {
9470       APInt SignMask;
9471       if (N0.getValueType().isVector()) {
9472         // For a vector, get a mask such as 0x80... per scalar element
9473         // and splat it.
9474         SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
9475         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9476       } else {
9477         // For a scalar, just generate 0x80...
9478         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
9479       }
9480       SDLoc DL0(N0);
9481       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
9482                         DAG.getConstant(SignMask, DL0, IntVT));
9483       AddToWorklist(Int.getNode());
9484       return DAG.getBitcast(VT, Int);
9485     }
9486   }
9487 
9488   // (fneg (fmul c, x)) -> (fmul -c, x)
9489   if (N0.getOpcode() == ISD::FMUL &&
9490       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
9491     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
9492     if (CFP1) {
9493       APFloat CVal = CFP1->getValueAPF();
9494       CVal.changeSign();
9495       if (Level >= AfterLegalizeDAG &&
9496           (TLI.isFPImmLegal(CVal, VT) ||
9497            TLI.isOperationLegal(ISD::ConstantFP, VT)))
9498         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
9499                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9500                                        N0.getOperand(1)),
9501                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
9502     }
9503   }
9504 
9505   return SDValue();
9506 }
9507 
9508 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
9509   SDValue N0 = N->getOperand(0);
9510   SDValue N1 = N->getOperand(1);
9511   EVT VT = N->getValueType(0);
9512   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9513   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9514 
9515   if (N0CFP && N1CFP) {
9516     const APFloat &C0 = N0CFP->getValueAPF();
9517     const APFloat &C1 = N1CFP->getValueAPF();
9518     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
9519   }
9520 
9521   // Canonicalize to constant on RHS.
9522   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9523      !isConstantFPBuildVectorOrConstantFP(N1))
9524     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
9525 
9526   return SDValue();
9527 }
9528 
9529 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
9530   SDValue N0 = N->getOperand(0);
9531   SDValue N1 = N->getOperand(1);
9532   EVT VT = N->getValueType(0);
9533   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9534   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9535 
9536   if (N0CFP && N1CFP) {
9537     const APFloat &C0 = N0CFP->getValueAPF();
9538     const APFloat &C1 = N1CFP->getValueAPF();
9539     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
9540   }
9541 
9542   // Canonicalize to constant on RHS.
9543   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9544      !isConstantFPBuildVectorOrConstantFP(N1))
9545     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
9546 
9547   return SDValue();
9548 }
9549 
9550 SDValue DAGCombiner::visitFABS(SDNode *N) {
9551   SDValue N0 = N->getOperand(0);
9552   EVT VT = N->getValueType(0);
9553 
9554   // fold (fabs c1) -> fabs(c1)
9555   if (isConstantFPBuildVectorOrConstantFP(N0))
9556     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9557 
9558   // fold (fabs (fabs x)) -> (fabs x)
9559   if (N0.getOpcode() == ISD::FABS)
9560     return N->getOperand(0);
9561 
9562   // fold (fabs (fneg x)) -> (fabs x)
9563   // fold (fabs (fcopysign x, y)) -> (fabs x)
9564   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
9565     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
9566 
9567   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
9568   // constant pool values.
9569   if (!TLI.isFAbsFree(VT) &&
9570       N0.getOpcode() == ISD::BITCAST &&
9571       N0.getNode()->hasOneUse()) {
9572     SDValue Int = N0.getOperand(0);
9573     EVT IntVT = Int.getValueType();
9574     if (IntVT.isInteger() && !IntVT.isVector()) {
9575       APInt SignMask;
9576       if (N0.getValueType().isVector()) {
9577         // For a vector, get a mask such as 0x7f... per scalar element
9578         // and splat it.
9579         SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
9580         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9581       } else {
9582         // For a scalar, just generate 0x7f...
9583         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
9584       }
9585       SDLoc DL(N0);
9586       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
9587                         DAG.getConstant(SignMask, DL, IntVT));
9588       AddToWorklist(Int.getNode());
9589       return DAG.getBitcast(N->getValueType(0), Int);
9590     }
9591   }
9592 
9593   return SDValue();
9594 }
9595 
9596 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
9597   SDValue Chain = N->getOperand(0);
9598   SDValue N1 = N->getOperand(1);
9599   SDValue N2 = N->getOperand(2);
9600 
9601   // If N is a constant we could fold this into a fallthrough or unconditional
9602   // branch. However that doesn't happen very often in normal code, because
9603   // Instcombine/SimplifyCFG should have handled the available opportunities.
9604   // If we did this folding here, it would be necessary to update the
9605   // MachineBasicBlock CFG, which is awkward.
9606 
9607   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
9608   // on the target.
9609   if (N1.getOpcode() == ISD::SETCC &&
9610       TLI.isOperationLegalOrCustom(ISD::BR_CC,
9611                                    N1.getOperand(0).getValueType())) {
9612     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9613                        Chain, N1.getOperand(2),
9614                        N1.getOperand(0), N1.getOperand(1), N2);
9615   }
9616 
9617   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
9618       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
9619        (N1.getOperand(0).hasOneUse() &&
9620         N1.getOperand(0).getOpcode() == ISD::SRL))) {
9621     SDNode *Trunc = nullptr;
9622     if (N1.getOpcode() == ISD::TRUNCATE) {
9623       // Look pass the truncate.
9624       Trunc = N1.getNode();
9625       N1 = N1.getOperand(0);
9626     }
9627 
9628     // Match this pattern so that we can generate simpler code:
9629     //
9630     //   %a = ...
9631     //   %b = and i32 %a, 2
9632     //   %c = srl i32 %b, 1
9633     //   brcond i32 %c ...
9634     //
9635     // into
9636     //
9637     //   %a = ...
9638     //   %b = and i32 %a, 2
9639     //   %c = setcc eq %b, 0
9640     //   brcond %c ...
9641     //
9642     // This applies only when the AND constant value has one bit set and the
9643     // SRL constant is equal to the log2 of the AND constant. The back-end is
9644     // smart enough to convert the result into a TEST/JMP sequence.
9645     SDValue Op0 = N1.getOperand(0);
9646     SDValue Op1 = N1.getOperand(1);
9647 
9648     if (Op0.getOpcode() == ISD::AND &&
9649         Op1.getOpcode() == ISD::Constant) {
9650       SDValue AndOp1 = Op0.getOperand(1);
9651 
9652       if (AndOp1.getOpcode() == ISD::Constant) {
9653         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
9654 
9655         if (AndConst.isPowerOf2() &&
9656             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9657           SDLoc DL(N);
9658           SDValue SetCC =
9659             DAG.getSetCC(DL,
9660                          getSetCCResultType(Op0.getValueType()),
9661                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9662                          ISD::SETNE);
9663 
9664           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9665                                           MVT::Other, Chain, SetCC, N2);
9666           // Don't add the new BRCond into the worklist or else SimplifySelectCC
9667           // will convert it back to (X & C1) >> C2.
9668           CombineTo(N, NewBRCond, false);
9669           // Truncate is dead.
9670           if (Trunc)
9671             deleteAndRecombine(Trunc);
9672           // Replace the uses of SRL with SETCC
9673           WorklistRemover DeadNodes(*this);
9674           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9675           deleteAndRecombine(N1.getNode());
9676           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9677         }
9678       }
9679     }
9680 
9681     if (Trunc)
9682       // Restore N1 if the above transformation doesn't match.
9683       N1 = N->getOperand(1);
9684   }
9685 
9686   // Transform br(xor(x, y)) -> br(x != y)
9687   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9688   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9689     SDNode *TheXor = N1.getNode();
9690     SDValue Op0 = TheXor->getOperand(0);
9691     SDValue Op1 = TheXor->getOperand(1);
9692     if (Op0.getOpcode() == Op1.getOpcode()) {
9693       // Avoid missing important xor optimizations.
9694       if (SDValue Tmp = visitXOR(TheXor)) {
9695         if (Tmp.getNode() != TheXor) {
9696           DEBUG(dbgs() << "\nReplacing.8 ";
9697                 TheXor->dump(&DAG);
9698                 dbgs() << "\nWith: ";
9699                 Tmp.getNode()->dump(&DAG);
9700                 dbgs() << '\n');
9701           WorklistRemover DeadNodes(*this);
9702           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9703           deleteAndRecombine(TheXor);
9704           return DAG.getNode(ISD::BRCOND, SDLoc(N),
9705                              MVT::Other, Chain, Tmp, N2);
9706         }
9707 
9708         // visitXOR has changed XOR's operands or replaced the XOR completely,
9709         // bail out.
9710         return SDValue(N, 0);
9711       }
9712     }
9713 
9714     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9715       bool Equal = false;
9716       if (isOneConstant(Op0) && Op0.hasOneUse() &&
9717           Op0.getOpcode() == ISD::XOR) {
9718         TheXor = Op0.getNode();
9719         Equal = true;
9720       }
9721 
9722       EVT SetCCVT = N1.getValueType();
9723       if (LegalTypes)
9724         SetCCVT = getSetCCResultType(SetCCVT);
9725       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9726                                    SetCCVT,
9727                                    Op0, Op1,
9728                                    Equal ? ISD::SETEQ : ISD::SETNE);
9729       // Replace the uses of XOR with SETCC
9730       WorklistRemover DeadNodes(*this);
9731       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9732       deleteAndRecombine(N1.getNode());
9733       return DAG.getNode(ISD::BRCOND, SDLoc(N),
9734                          MVT::Other, Chain, SetCC, N2);
9735     }
9736   }
9737 
9738   return SDValue();
9739 }
9740 
9741 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9742 //
9743 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9744   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9745   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9746 
9747   // If N is a constant we could fold this into a fallthrough or unconditional
9748   // branch. However that doesn't happen very often in normal code, because
9749   // Instcombine/SimplifyCFG should have handled the available opportunities.
9750   // If we did this folding here, it would be necessary to update the
9751   // MachineBasicBlock CFG, which is awkward.
9752 
9753   // Use SimplifySetCC to simplify SETCC's.
9754   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9755                                CondLHS, CondRHS, CC->get(), SDLoc(N),
9756                                false);
9757   if (Simp.getNode()) AddToWorklist(Simp.getNode());
9758 
9759   // fold to a simpler setcc
9760   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9761     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9762                        N->getOperand(0), Simp.getOperand(2),
9763                        Simp.getOperand(0), Simp.getOperand(1),
9764                        N->getOperand(4));
9765 
9766   return SDValue();
9767 }
9768 
9769 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9770 /// and that N may be folded in the load / store addressing mode.
9771 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
9772                                     SelectionDAG &DAG,
9773                                     const TargetLowering &TLI) {
9774   EVT VT;
9775   unsigned AS;
9776 
9777   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
9778     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9779       return false;
9780     VT = LD->getMemoryVT();
9781     AS = LD->getAddressSpace();
9782   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
9783     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9784       return false;
9785     VT = ST->getMemoryVT();
9786     AS = ST->getAddressSpace();
9787   } else
9788     return false;
9789 
9790   TargetLowering::AddrMode AM;
9791   if (N->getOpcode() == ISD::ADD) {
9792     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9793     if (Offset)
9794       // [reg +/- imm]
9795       AM.BaseOffs = Offset->getSExtValue();
9796     else
9797       // [reg +/- reg]
9798       AM.Scale = 1;
9799   } else if (N->getOpcode() == ISD::SUB) {
9800     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9801     if (Offset)
9802       // [reg +/- imm]
9803       AM.BaseOffs = -Offset->getSExtValue();
9804     else
9805       // [reg +/- reg]
9806       AM.Scale = 1;
9807   } else
9808     return false;
9809 
9810   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9811                                    VT.getTypeForEVT(*DAG.getContext()), AS);
9812 }
9813 
9814 /// Try turning a load/store into a pre-indexed load/store when the base
9815 /// pointer is an add or subtract and it has other uses besides the load/store.
9816 /// After the transformation, the new indexed load/store has effectively folded
9817 /// the add/subtract in and all of its other uses are redirected to the
9818 /// new load/store.
9819 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9820   if (Level < AfterLegalizeDAG)
9821     return false;
9822 
9823   bool isLoad = true;
9824   SDValue Ptr;
9825   EVT VT;
9826   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9827     if (LD->isIndexed())
9828       return false;
9829     VT = LD->getMemoryVT();
9830     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9831         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9832       return false;
9833     Ptr = LD->getBasePtr();
9834   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9835     if (ST->isIndexed())
9836       return false;
9837     VT = ST->getMemoryVT();
9838     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9839         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9840       return false;
9841     Ptr = ST->getBasePtr();
9842     isLoad = false;
9843   } else {
9844     return false;
9845   }
9846 
9847   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
9848   // out.  There is no reason to make this a preinc/predec.
9849   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
9850       Ptr.getNode()->hasOneUse())
9851     return false;
9852 
9853   // Ask the target to do addressing mode selection.
9854   SDValue BasePtr;
9855   SDValue Offset;
9856   ISD::MemIndexedMode AM = ISD::UNINDEXED;
9857   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
9858     return false;
9859 
9860   // Backends without true r+i pre-indexed forms may need to pass a
9861   // constant base with a variable offset so that constant coercion
9862   // will work with the patterns in canonical form.
9863   bool Swapped = false;
9864   if (isa<ConstantSDNode>(BasePtr)) {
9865     std::swap(BasePtr, Offset);
9866     Swapped = true;
9867   }
9868 
9869   // Don't create a indexed load / store with zero offset.
9870   if (isNullConstant(Offset))
9871     return false;
9872 
9873   // Try turning it into a pre-indexed load / store except when:
9874   // 1) The new base ptr is a frame index.
9875   // 2) If N is a store and the new base ptr is either the same as or is a
9876   //    predecessor of the value being stored.
9877   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
9878   //    that would create a cycle.
9879   // 4) All uses are load / store ops that use it as old base ptr.
9880 
9881   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
9882   // (plus the implicit offset) to a register to preinc anyway.
9883   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9884     return false;
9885 
9886   // Check #2.
9887   if (!isLoad) {
9888     SDValue Val = cast<StoreSDNode>(N)->getValue();
9889     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
9890       return false;
9891   }
9892 
9893   // Caches for hasPredecessorHelper.
9894   SmallPtrSet<const SDNode *, 32> Visited;
9895   SmallVector<const SDNode *, 16> Worklist;
9896   Worklist.push_back(N);
9897 
9898   // If the offset is a constant, there may be other adds of constants that
9899   // can be folded with this one. We should do this to avoid having to keep
9900   // a copy of the original base pointer.
9901   SmallVector<SDNode *, 16> OtherUses;
9902   if (isa<ConstantSDNode>(Offset))
9903     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
9904                               UE = BasePtr.getNode()->use_end();
9905          UI != UE; ++UI) {
9906       SDUse &Use = UI.getUse();
9907       // Skip the use that is Ptr and uses of other results from BasePtr's
9908       // node (important for nodes that return multiple results).
9909       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
9910         continue;
9911 
9912       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
9913         continue;
9914 
9915       if (Use.getUser()->getOpcode() != ISD::ADD &&
9916           Use.getUser()->getOpcode() != ISD::SUB) {
9917         OtherUses.clear();
9918         break;
9919       }
9920 
9921       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
9922       if (!isa<ConstantSDNode>(Op1)) {
9923         OtherUses.clear();
9924         break;
9925       }
9926 
9927       // FIXME: In some cases, we can be smarter about this.
9928       if (Op1.getValueType() != Offset.getValueType()) {
9929         OtherUses.clear();
9930         break;
9931       }
9932 
9933       OtherUses.push_back(Use.getUser());
9934     }
9935 
9936   if (Swapped)
9937     std::swap(BasePtr, Offset);
9938 
9939   // Now check for #3 and #4.
9940   bool RealUse = false;
9941 
9942   for (SDNode *Use : Ptr.getNode()->uses()) {
9943     if (Use == N)
9944       continue;
9945     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
9946       return false;
9947 
9948     // If Ptr may be folded in addressing mode of other use, then it's
9949     // not profitable to do this transformation.
9950     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
9951       RealUse = true;
9952   }
9953 
9954   if (!RealUse)
9955     return false;
9956 
9957   SDValue Result;
9958   if (isLoad)
9959     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9960                                 BasePtr, Offset, AM);
9961   else
9962     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9963                                  BasePtr, Offset, AM);
9964   ++PreIndexedNodes;
9965   ++NodesCombined;
9966   DEBUG(dbgs() << "\nReplacing.4 ";
9967         N->dump(&DAG);
9968         dbgs() << "\nWith: ";
9969         Result.getNode()->dump(&DAG);
9970         dbgs() << '\n');
9971   WorklistRemover DeadNodes(*this);
9972   if (isLoad) {
9973     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9974     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9975   } else {
9976     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9977   }
9978 
9979   // Finally, since the node is now dead, remove it from the graph.
9980   deleteAndRecombine(N);
9981 
9982   if (Swapped)
9983     std::swap(BasePtr, Offset);
9984 
9985   // Replace other uses of BasePtr that can be updated to use Ptr
9986   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
9987     unsigned OffsetIdx = 1;
9988     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
9989       OffsetIdx = 0;
9990     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
9991            BasePtr.getNode() && "Expected BasePtr operand");
9992 
9993     // We need to replace ptr0 in the following expression:
9994     //   x0 * offset0 + y0 * ptr0 = t0
9995     // knowing that
9996     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
9997     //
9998     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
9999     // indexed load/store and the expresion that needs to be re-written.
10000     //
10001     // Therefore, we have:
10002     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
10003 
10004     ConstantSDNode *CN =
10005       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
10006     int X0, X1, Y0, Y1;
10007     const APInt &Offset0 = CN->getAPIntValue();
10008     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
10009 
10010     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
10011     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
10012     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
10013     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
10014 
10015     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
10016 
10017     APInt CNV = Offset0;
10018     if (X0 < 0) CNV = -CNV;
10019     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
10020     else CNV = CNV - Offset1;
10021 
10022     SDLoc DL(OtherUses[i]);
10023 
10024     // We can now generate the new expression.
10025     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
10026     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
10027 
10028     SDValue NewUse = DAG.getNode(Opcode,
10029                                  DL,
10030                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
10031     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
10032     deleteAndRecombine(OtherUses[i]);
10033   }
10034 
10035   // Replace the uses of Ptr with uses of the updated base value.
10036   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
10037   deleteAndRecombine(Ptr.getNode());
10038 
10039   return true;
10040 }
10041 
10042 /// Try to combine a load/store with a add/sub of the base pointer node into a
10043 /// post-indexed load/store. The transformation folded the add/subtract into the
10044 /// new indexed load/store effectively and all of its uses are redirected to the
10045 /// new load/store.
10046 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
10047   if (Level < AfterLegalizeDAG)
10048     return false;
10049 
10050   bool isLoad = true;
10051   SDValue Ptr;
10052   EVT VT;
10053   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10054     if (LD->isIndexed())
10055       return false;
10056     VT = LD->getMemoryVT();
10057     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
10058         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
10059       return false;
10060     Ptr = LD->getBasePtr();
10061   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10062     if (ST->isIndexed())
10063       return false;
10064     VT = ST->getMemoryVT();
10065     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
10066         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
10067       return false;
10068     Ptr = ST->getBasePtr();
10069     isLoad = false;
10070   } else {
10071     return false;
10072   }
10073 
10074   if (Ptr.getNode()->hasOneUse())
10075     return false;
10076 
10077   for (SDNode *Op : Ptr.getNode()->uses()) {
10078     if (Op == N ||
10079         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
10080       continue;
10081 
10082     SDValue BasePtr;
10083     SDValue Offset;
10084     ISD::MemIndexedMode AM = ISD::UNINDEXED;
10085     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
10086       // Don't create a indexed load / store with zero offset.
10087       if (isNullConstant(Offset))
10088         continue;
10089 
10090       // Try turning it into a post-indexed load / store except when
10091       // 1) All uses are load / store ops that use it as base ptr (and
10092       //    it may be folded as addressing mmode).
10093       // 2) Op must be independent of N, i.e. Op is neither a predecessor
10094       //    nor a successor of N. Otherwise, if Op is folded that would
10095       //    create a cycle.
10096 
10097       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10098         continue;
10099 
10100       // Check for #1.
10101       bool TryNext = false;
10102       for (SDNode *Use : BasePtr.getNode()->uses()) {
10103         if (Use == Ptr.getNode())
10104           continue;
10105 
10106         // If all the uses are load / store addresses, then don't do the
10107         // transformation.
10108         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10109           bool RealUse = false;
10110           for (SDNode *UseUse : Use->uses()) {
10111             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10112               RealUse = true;
10113           }
10114 
10115           if (!RealUse) {
10116             TryNext = true;
10117             break;
10118           }
10119         }
10120       }
10121 
10122       if (TryNext)
10123         continue;
10124 
10125       // Check for #2
10126       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10127         SDValue Result = isLoad
10128           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10129                                BasePtr, Offset, AM)
10130           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10131                                 BasePtr, Offset, AM);
10132         ++PostIndexedNodes;
10133         ++NodesCombined;
10134         DEBUG(dbgs() << "\nReplacing.5 ";
10135               N->dump(&DAG);
10136               dbgs() << "\nWith: ";
10137               Result.getNode()->dump(&DAG);
10138               dbgs() << '\n');
10139         WorklistRemover DeadNodes(*this);
10140         if (isLoad) {
10141           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10142           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10143         } else {
10144           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10145         }
10146 
10147         // Finally, since the node is now dead, remove it from the graph.
10148         deleteAndRecombine(N);
10149 
10150         // Replace the uses of Use with uses of the updated base value.
10151         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
10152                                       Result.getValue(isLoad ? 1 : 0));
10153         deleteAndRecombine(Op);
10154         return true;
10155       }
10156     }
10157   }
10158 
10159   return false;
10160 }
10161 
10162 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10163 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10164   ISD::MemIndexedMode AM = LD->getAddressingMode();
10165   assert(AM != ISD::UNINDEXED);
10166   SDValue BP = LD->getOperand(1);
10167   SDValue Inc = LD->getOperand(2);
10168 
10169   // Some backends use TargetConstants for load offsets, but don't expect
10170   // TargetConstants in general ADD nodes. We can convert these constants into
10171   // regular Constants (if the constant is not opaque).
10172   assert((Inc.getOpcode() != ISD::TargetConstant ||
10173           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10174          "Cannot split out indexing using opaque target constants");
10175   if (Inc.getOpcode() == ISD::TargetConstant) {
10176     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10177     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10178                           ConstInc->getValueType(0));
10179   }
10180 
10181   unsigned Opc =
10182       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
10183   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
10184 }
10185 
10186 SDValue DAGCombiner::visitLOAD(SDNode *N) {
10187   LoadSDNode *LD  = cast<LoadSDNode>(N);
10188   SDValue Chain = LD->getChain();
10189   SDValue Ptr   = LD->getBasePtr();
10190 
10191   // If load is not volatile and there are no uses of the loaded value (and
10192   // the updated indexed value in case of indexed loads), change uses of the
10193   // chain value into uses of the chain input (i.e. delete the dead load).
10194   if (!LD->isVolatile()) {
10195     if (N->getValueType(1) == MVT::Other) {
10196       // Unindexed loads.
10197       if (!N->hasAnyUseOfValue(0)) {
10198         // It's not safe to use the two value CombineTo variant here. e.g.
10199         // v1, chain2 = load chain1, loc
10200         // v2, chain3 = load chain2, loc
10201         // v3         = add v2, c
10202         // Now we replace use of chain2 with chain1.  This makes the second load
10203         // isomorphic to the one we are deleting, and thus makes this load live.
10204         DEBUG(dbgs() << "\nReplacing.6 ";
10205               N->dump(&DAG);
10206               dbgs() << "\nWith chain: ";
10207               Chain.getNode()->dump(&DAG);
10208               dbgs() << "\n");
10209         WorklistRemover DeadNodes(*this);
10210         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10211 
10212         if (N->use_empty())
10213           deleteAndRecombine(N);
10214 
10215         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10216       }
10217     } else {
10218       // Indexed loads.
10219       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
10220 
10221       // If this load has an opaque TargetConstant offset, then we cannot split
10222       // the indexing into an add/sub directly (that TargetConstant may not be
10223       // valid for a different type of node, and we cannot convert an opaque
10224       // target constant into a regular constant).
10225       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
10226                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
10227 
10228       if (!N->hasAnyUseOfValue(0) &&
10229           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
10230         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
10231         SDValue Index;
10232         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
10233           Index = SplitIndexingFromLoad(LD);
10234           // Try to fold the base pointer arithmetic into subsequent loads and
10235           // stores.
10236           AddUsersToWorklist(N);
10237         } else
10238           Index = DAG.getUNDEF(N->getValueType(1));
10239         DEBUG(dbgs() << "\nReplacing.7 ";
10240               N->dump(&DAG);
10241               dbgs() << "\nWith: ";
10242               Undef.getNode()->dump(&DAG);
10243               dbgs() << " and 2 other values\n");
10244         WorklistRemover DeadNodes(*this);
10245         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
10246         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
10247         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
10248         deleteAndRecombine(N);
10249         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10250       }
10251     }
10252   }
10253 
10254   // If this load is directly stored, replace the load value with the stored
10255   // value.
10256   // TODO: Handle store large -> read small portion.
10257   // TODO: Handle TRUNCSTORE/LOADEXT
10258   if (OptLevel != CodeGenOpt::None &&
10259       ISD::isNormalLoad(N) && !LD->isVolatile()) {
10260     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10261       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
10262       if (PrevST->getBasePtr() == Ptr &&
10263           PrevST->getValue().getValueType() == N->getValueType(0))
10264       return CombineTo(N, Chain.getOperand(1), Chain);
10265     }
10266   }
10267 
10268   // Try to infer better alignment information than the load already has.
10269   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
10270     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
10271       if (Align > LD->getMemOperand()->getBaseAlignment()) {
10272         SDValue NewLoad = DAG.getExtLoad(
10273             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
10274             LD->getPointerInfo(), LD->getMemoryVT(), Align,
10275             LD->getMemOperand()->getFlags(), LD->getAAInfo());
10276         if (NewLoad.getNode() != N)
10277           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
10278       }
10279     }
10280   }
10281 
10282   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10283                                                   : DAG.getSubtarget().useAA();
10284 #ifndef NDEBUG
10285   if (CombinerAAOnlyFunc.getNumOccurrences() &&
10286       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10287     UseAA = false;
10288 #endif
10289   if (UseAA && LD->isUnindexed()) {
10290     // Walk up chain skipping non-aliasing memory nodes.
10291     SDValue BetterChain = FindBetterChain(N, Chain);
10292 
10293     // If there is a better chain.
10294     if (Chain != BetterChain) {
10295       SDValue ReplLoad;
10296 
10297       // Replace the chain to void dependency.
10298       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
10299         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
10300                                BetterChain, Ptr, LD->getMemOperand());
10301       } else {
10302         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
10303                                   LD->getValueType(0),
10304                                   BetterChain, Ptr, LD->getMemoryVT(),
10305                                   LD->getMemOperand());
10306       }
10307 
10308       // Create token factor to keep old chain connected.
10309       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
10310                                   MVT::Other, Chain, ReplLoad.getValue(1));
10311 
10312       // Make sure the new and old chains are cleaned up.
10313       AddToWorklist(Token.getNode());
10314 
10315       // Replace uses with load result and token factor. Don't add users
10316       // to work list.
10317       return CombineTo(N, ReplLoad.getValue(0), Token, false);
10318     }
10319   }
10320 
10321   // Try transforming N to an indexed load.
10322   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10323     return SDValue(N, 0);
10324 
10325   // Try to slice up N to more direct loads if the slices are mapped to
10326   // different register banks or pairing can take place.
10327   if (SliceUpLoad(N))
10328     return SDValue(N, 0);
10329 
10330   return SDValue();
10331 }
10332 
10333 namespace {
10334 /// \brief Helper structure used to slice a load in smaller loads.
10335 /// Basically a slice is obtained from the following sequence:
10336 /// Origin = load Ty1, Base
10337 /// Shift = srl Ty1 Origin, CstTy Amount
10338 /// Inst = trunc Shift to Ty2
10339 ///
10340 /// Then, it will be rewriten into:
10341 /// Slice = load SliceTy, Base + SliceOffset
10342 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
10343 ///
10344 /// SliceTy is deduced from the number of bits that are actually used to
10345 /// build Inst.
10346 struct LoadedSlice {
10347   /// \brief Helper structure used to compute the cost of a slice.
10348   struct Cost {
10349     /// Are we optimizing for code size.
10350     bool ForCodeSize;
10351     /// Various cost.
10352     unsigned Loads;
10353     unsigned Truncates;
10354     unsigned CrossRegisterBanksCopies;
10355     unsigned ZExts;
10356     unsigned Shift;
10357 
10358     Cost(bool ForCodeSize = false)
10359         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
10360           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
10361 
10362     /// \brief Get the cost of one isolated slice.
10363     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
10364         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
10365           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
10366       EVT TruncType = LS.Inst->getValueType(0);
10367       EVT LoadedType = LS.getLoadedType();
10368       if (TruncType != LoadedType &&
10369           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
10370         ZExts = 1;
10371     }
10372 
10373     /// \brief Account for slicing gain in the current cost.
10374     /// Slicing provide a few gains like removing a shift or a
10375     /// truncate. This method allows to grow the cost of the original
10376     /// load with the gain from this slice.
10377     void addSliceGain(const LoadedSlice &LS) {
10378       // Each slice saves a truncate.
10379       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
10380       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
10381                               LS.Inst->getValueType(0)))
10382         ++Truncates;
10383       // If there is a shift amount, this slice gets rid of it.
10384       if (LS.Shift)
10385         ++Shift;
10386       // If this slice can merge a cross register bank copy, account for it.
10387       if (LS.canMergeExpensiveCrossRegisterBankCopy())
10388         ++CrossRegisterBanksCopies;
10389     }
10390 
10391     Cost &operator+=(const Cost &RHS) {
10392       Loads += RHS.Loads;
10393       Truncates += RHS.Truncates;
10394       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
10395       ZExts += RHS.ZExts;
10396       Shift += RHS.Shift;
10397       return *this;
10398     }
10399 
10400     bool operator==(const Cost &RHS) const {
10401       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
10402              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
10403              ZExts == RHS.ZExts && Shift == RHS.Shift;
10404     }
10405 
10406     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
10407 
10408     bool operator<(const Cost &RHS) const {
10409       // Assume cross register banks copies are as expensive as loads.
10410       // FIXME: Do we want some more target hooks?
10411       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
10412       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
10413       // Unless we are optimizing for code size, consider the
10414       // expensive operation first.
10415       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
10416         return ExpensiveOpsLHS < ExpensiveOpsRHS;
10417       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
10418              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
10419     }
10420 
10421     bool operator>(const Cost &RHS) const { return RHS < *this; }
10422 
10423     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
10424 
10425     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
10426   };
10427   // The last instruction that represent the slice. This should be a
10428   // truncate instruction.
10429   SDNode *Inst;
10430   // The original load instruction.
10431   LoadSDNode *Origin;
10432   // The right shift amount in bits from the original load.
10433   unsigned Shift;
10434   // The DAG from which Origin came from.
10435   // This is used to get some contextual information about legal types, etc.
10436   SelectionDAG *DAG;
10437 
10438   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
10439               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
10440       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
10441 
10442   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
10443   /// \return Result is \p BitWidth and has used bits set to 1 and
10444   ///         not used bits set to 0.
10445   APInt getUsedBits() const {
10446     // Reproduce the trunc(lshr) sequence:
10447     // - Start from the truncated value.
10448     // - Zero extend to the desired bit width.
10449     // - Shift left.
10450     assert(Origin && "No original load to compare against.");
10451     unsigned BitWidth = Origin->getValueSizeInBits(0);
10452     assert(Inst && "This slice is not bound to an instruction");
10453     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
10454            "Extracted slice is bigger than the whole type!");
10455     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
10456     UsedBits.setAllBits();
10457     UsedBits = UsedBits.zext(BitWidth);
10458     UsedBits <<= Shift;
10459     return UsedBits;
10460   }
10461 
10462   /// \brief Get the size of the slice to be loaded in bytes.
10463   unsigned getLoadedSize() const {
10464     unsigned SliceSize = getUsedBits().countPopulation();
10465     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
10466     return SliceSize / 8;
10467   }
10468 
10469   /// \brief Get the type that will be loaded for this slice.
10470   /// Note: This may not be the final type for the slice.
10471   EVT getLoadedType() const {
10472     assert(DAG && "Missing context");
10473     LLVMContext &Ctxt = *DAG->getContext();
10474     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
10475   }
10476 
10477   /// \brief Get the alignment of the load used for this slice.
10478   unsigned getAlignment() const {
10479     unsigned Alignment = Origin->getAlignment();
10480     unsigned Offset = getOffsetFromBase();
10481     if (Offset != 0)
10482       Alignment = MinAlign(Alignment, Alignment + Offset);
10483     return Alignment;
10484   }
10485 
10486   /// \brief Check if this slice can be rewritten with legal operations.
10487   bool isLegal() const {
10488     // An invalid slice is not legal.
10489     if (!Origin || !Inst || !DAG)
10490       return false;
10491 
10492     // Offsets are for indexed load only, we do not handle that.
10493     if (!Origin->getOffset().isUndef())
10494       return false;
10495 
10496     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10497 
10498     // Check that the type is legal.
10499     EVT SliceType = getLoadedType();
10500     if (!TLI.isTypeLegal(SliceType))
10501       return false;
10502 
10503     // Check that the load is legal for this type.
10504     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
10505       return false;
10506 
10507     // Check that the offset can be computed.
10508     // 1. Check its type.
10509     EVT PtrType = Origin->getBasePtr().getValueType();
10510     if (PtrType == MVT::Untyped || PtrType.isExtended())
10511       return false;
10512 
10513     // 2. Check that it fits in the immediate.
10514     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
10515       return false;
10516 
10517     // 3. Check that the computation is legal.
10518     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
10519       return false;
10520 
10521     // Check that the zext is legal if it needs one.
10522     EVT TruncateType = Inst->getValueType(0);
10523     if (TruncateType != SliceType &&
10524         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
10525       return false;
10526 
10527     return true;
10528   }
10529 
10530   /// \brief Get the offset in bytes of this slice in the original chunk of
10531   /// bits.
10532   /// \pre DAG != nullptr.
10533   uint64_t getOffsetFromBase() const {
10534     assert(DAG && "Missing context.");
10535     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
10536     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
10537     uint64_t Offset = Shift / 8;
10538     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
10539     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
10540            "The size of the original loaded type is not a multiple of a"
10541            " byte.");
10542     // If Offset is bigger than TySizeInBytes, it means we are loading all
10543     // zeros. This should have been optimized before in the process.
10544     assert(TySizeInBytes > Offset &&
10545            "Invalid shift amount for given loaded size");
10546     if (IsBigEndian)
10547       Offset = TySizeInBytes - Offset - getLoadedSize();
10548     return Offset;
10549   }
10550 
10551   /// \brief Generate the sequence of instructions to load the slice
10552   /// represented by this object and redirect the uses of this slice to
10553   /// this new sequence of instructions.
10554   /// \pre this->Inst && this->Origin are valid Instructions and this
10555   /// object passed the legal check: LoadedSlice::isLegal returned true.
10556   /// \return The last instruction of the sequence used to load the slice.
10557   SDValue loadSlice() const {
10558     assert(Inst && Origin && "Unable to replace a non-existing slice.");
10559     const SDValue &OldBaseAddr = Origin->getBasePtr();
10560     SDValue BaseAddr = OldBaseAddr;
10561     // Get the offset in that chunk of bytes w.r.t. the endianess.
10562     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
10563     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
10564     if (Offset) {
10565       // BaseAddr = BaseAddr + Offset.
10566       EVT ArithType = BaseAddr.getValueType();
10567       SDLoc DL(Origin);
10568       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
10569                               DAG->getConstant(Offset, DL, ArithType));
10570     }
10571 
10572     // Create the type of the loaded slice according to its size.
10573     EVT SliceType = getLoadedType();
10574 
10575     // Create the load for the slice.
10576     SDValue LastInst =
10577         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
10578                      Origin->getPointerInfo().getWithOffset(Offset),
10579                      getAlignment(), Origin->getMemOperand()->getFlags());
10580     // If the final type is not the same as the loaded type, this means that
10581     // we have to pad with zero. Create a zero extend for that.
10582     EVT FinalType = Inst->getValueType(0);
10583     if (SliceType != FinalType)
10584       LastInst =
10585           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
10586     return LastInst;
10587   }
10588 
10589   /// \brief Check if this slice can be merged with an expensive cross register
10590   /// bank copy. E.g.,
10591   /// i = load i32
10592   /// f = bitcast i32 i to float
10593   bool canMergeExpensiveCrossRegisterBankCopy() const {
10594     if (!Inst || !Inst->hasOneUse())
10595       return false;
10596     SDNode *Use = *Inst->use_begin();
10597     if (Use->getOpcode() != ISD::BITCAST)
10598       return false;
10599     assert(DAG && "Missing context");
10600     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10601     EVT ResVT = Use->getValueType(0);
10602     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
10603     const TargetRegisterClass *ArgRC =
10604         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
10605     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
10606       return false;
10607 
10608     // At this point, we know that we perform a cross-register-bank copy.
10609     // Check if it is expensive.
10610     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
10611     // Assume bitcasts are cheap, unless both register classes do not
10612     // explicitly share a common sub class.
10613     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
10614       return false;
10615 
10616     // Check if it will be merged with the load.
10617     // 1. Check the alignment constraint.
10618     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
10619         ResVT.getTypeForEVT(*DAG->getContext()));
10620 
10621     if (RequiredAlignment > getAlignment())
10622       return false;
10623 
10624     // 2. Check that the load is a legal operation for that type.
10625     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
10626       return false;
10627 
10628     // 3. Check that we do not have a zext in the way.
10629     if (Inst->getValueType(0) != getLoadedType())
10630       return false;
10631 
10632     return true;
10633   }
10634 };
10635 }
10636 
10637 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
10638 /// \p UsedBits looks like 0..0 1..1 0..0.
10639 static bool areUsedBitsDense(const APInt &UsedBits) {
10640   // If all the bits are one, this is dense!
10641   if (UsedBits.isAllOnesValue())
10642     return true;
10643 
10644   // Get rid of the unused bits on the right.
10645   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
10646   // Get rid of the unused bits on the left.
10647   if (NarrowedUsedBits.countLeadingZeros())
10648     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
10649   // Check that the chunk of bits is completely used.
10650   return NarrowedUsedBits.isAllOnesValue();
10651 }
10652 
10653 /// \brief Check whether or not \p First and \p Second are next to each other
10654 /// in memory. This means that there is no hole between the bits loaded
10655 /// by \p First and the bits loaded by \p Second.
10656 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10657                                      const LoadedSlice &Second) {
10658   assert(First.Origin == Second.Origin && First.Origin &&
10659          "Unable to match different memory origins.");
10660   APInt UsedBits = First.getUsedBits();
10661   assert((UsedBits & Second.getUsedBits()) == 0 &&
10662          "Slices are not supposed to overlap.");
10663   UsedBits |= Second.getUsedBits();
10664   return areUsedBitsDense(UsedBits);
10665 }
10666 
10667 /// \brief Adjust the \p GlobalLSCost according to the target
10668 /// paring capabilities and the layout of the slices.
10669 /// \pre \p GlobalLSCost should account for at least as many loads as
10670 /// there is in the slices in \p LoadedSlices.
10671 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10672                                  LoadedSlice::Cost &GlobalLSCost) {
10673   unsigned NumberOfSlices = LoadedSlices.size();
10674   // If there is less than 2 elements, no pairing is possible.
10675   if (NumberOfSlices < 2)
10676     return;
10677 
10678   // Sort the slices so that elements that are likely to be next to each
10679   // other in memory are next to each other in the list.
10680   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10681             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10682     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10683     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10684   });
10685   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10686   // First (resp. Second) is the first (resp. Second) potentially candidate
10687   // to be placed in a paired load.
10688   const LoadedSlice *First = nullptr;
10689   const LoadedSlice *Second = nullptr;
10690   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10691                 // Set the beginning of the pair.
10692                                                            First = Second) {
10693 
10694     Second = &LoadedSlices[CurrSlice];
10695 
10696     // If First is NULL, it means we start a new pair.
10697     // Get to the next slice.
10698     if (!First)
10699       continue;
10700 
10701     EVT LoadedType = First->getLoadedType();
10702 
10703     // If the types of the slices are different, we cannot pair them.
10704     if (LoadedType != Second->getLoadedType())
10705       continue;
10706 
10707     // Check if the target supplies paired loads for this type.
10708     unsigned RequiredAlignment = 0;
10709     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10710       // move to the next pair, this type is hopeless.
10711       Second = nullptr;
10712       continue;
10713     }
10714     // Check if we meet the alignment requirement.
10715     if (RequiredAlignment > First->getAlignment())
10716       continue;
10717 
10718     // Check that both loads are next to each other in memory.
10719     if (!areSlicesNextToEachOther(*First, *Second))
10720       continue;
10721 
10722     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10723     --GlobalLSCost.Loads;
10724     // Move to the next pair.
10725     Second = nullptr;
10726   }
10727 }
10728 
10729 /// \brief Check the profitability of all involved LoadedSlice.
10730 /// Currently, it is considered profitable if there is exactly two
10731 /// involved slices (1) which are (2) next to each other in memory, and
10732 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10733 ///
10734 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10735 /// the elements themselves.
10736 ///
10737 /// FIXME: When the cost model will be mature enough, we can relax
10738 /// constraints (1) and (2).
10739 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10740                                 const APInt &UsedBits, bool ForCodeSize) {
10741   unsigned NumberOfSlices = LoadedSlices.size();
10742   if (StressLoadSlicing)
10743     return NumberOfSlices > 1;
10744 
10745   // Check (1).
10746   if (NumberOfSlices != 2)
10747     return false;
10748 
10749   // Check (2).
10750   if (!areUsedBitsDense(UsedBits))
10751     return false;
10752 
10753   // Check (3).
10754   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10755   // The original code has one big load.
10756   OrigCost.Loads = 1;
10757   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10758     const LoadedSlice &LS = LoadedSlices[CurrSlice];
10759     // Accumulate the cost of all the slices.
10760     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10761     GlobalSlicingCost += SliceCost;
10762 
10763     // Account as cost in the original configuration the gain obtained
10764     // with the current slices.
10765     OrigCost.addSliceGain(LS);
10766   }
10767 
10768   // If the target supports paired load, adjust the cost accordingly.
10769   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10770   return OrigCost > GlobalSlicingCost;
10771 }
10772 
10773 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10774 /// operations, split it in the various pieces being extracted.
10775 ///
10776 /// This sort of thing is introduced by SROA.
10777 /// This slicing takes care not to insert overlapping loads.
10778 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10779 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10780   if (Level < AfterLegalizeDAG)
10781     return false;
10782 
10783   LoadSDNode *LD = cast<LoadSDNode>(N);
10784   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10785       !LD->getValueType(0).isInteger())
10786     return false;
10787 
10788   // Keep track of already used bits to detect overlapping values.
10789   // In that case, we will just abort the transformation.
10790   APInt UsedBits(LD->getValueSizeInBits(0), 0);
10791 
10792   SmallVector<LoadedSlice, 4> LoadedSlices;
10793 
10794   // Check if this load is used as several smaller chunks of bits.
10795   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10796   // of computation for each trunc.
10797   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10798        UI != UIEnd; ++UI) {
10799     // Skip the uses of the chain.
10800     if (UI.getUse().getResNo() != 0)
10801       continue;
10802 
10803     SDNode *User = *UI;
10804     unsigned Shift = 0;
10805 
10806     // Check if this is a trunc(lshr).
10807     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10808         isa<ConstantSDNode>(User->getOperand(1))) {
10809       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10810       User = *User->use_begin();
10811     }
10812 
10813     // At this point, User is a Truncate, iff we encountered, trunc or
10814     // trunc(lshr).
10815     if (User->getOpcode() != ISD::TRUNCATE)
10816       return false;
10817 
10818     // The width of the type must be a power of 2 and greater than 8-bits.
10819     // Otherwise the load cannot be represented in LLVM IR.
10820     // Moreover, if we shifted with a non-8-bits multiple, the slice
10821     // will be across several bytes. We do not support that.
10822     unsigned Width = User->getValueSizeInBits(0);
10823     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10824       return 0;
10825 
10826     // Build the slice for this chain of computations.
10827     LoadedSlice LS(User, LD, Shift, &DAG);
10828     APInt CurrentUsedBits = LS.getUsedBits();
10829 
10830     // Check if this slice overlaps with another.
10831     if ((CurrentUsedBits & UsedBits) != 0)
10832       return false;
10833     // Update the bits used globally.
10834     UsedBits |= CurrentUsedBits;
10835 
10836     // Check if the new slice would be legal.
10837     if (!LS.isLegal())
10838       return false;
10839 
10840     // Record the slice.
10841     LoadedSlices.push_back(LS);
10842   }
10843 
10844   // Abort slicing if it does not seem to be profitable.
10845   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10846     return false;
10847 
10848   ++SlicedLoads;
10849 
10850   // Rewrite each chain to use an independent load.
10851   // By construction, each chain can be represented by a unique load.
10852 
10853   // Prepare the argument for the new token factor for all the slices.
10854   SmallVector<SDValue, 8> ArgChains;
10855   for (SmallVectorImpl<LoadedSlice>::const_iterator
10856            LSIt = LoadedSlices.begin(),
10857            LSItEnd = LoadedSlices.end();
10858        LSIt != LSItEnd; ++LSIt) {
10859     SDValue SliceInst = LSIt->loadSlice();
10860     CombineTo(LSIt->Inst, SliceInst, true);
10861     if (SliceInst.getOpcode() != ISD::LOAD)
10862       SliceInst = SliceInst.getOperand(0);
10863     assert(SliceInst->getOpcode() == ISD::LOAD &&
10864            "It takes more than a zext to get to the loaded slice!!");
10865     ArgChains.push_back(SliceInst.getValue(1));
10866   }
10867 
10868   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
10869                               ArgChains);
10870   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10871   return true;
10872 }
10873 
10874 /// Check to see if V is (and load (ptr), imm), where the load is having
10875 /// specific bytes cleared out.  If so, return the byte size being masked out
10876 /// and the shift amount.
10877 static std::pair<unsigned, unsigned>
10878 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
10879   std::pair<unsigned, unsigned> Result(0, 0);
10880 
10881   // Check for the structure we're looking for.
10882   if (V->getOpcode() != ISD::AND ||
10883       !isa<ConstantSDNode>(V->getOperand(1)) ||
10884       !ISD::isNormalLoad(V->getOperand(0).getNode()))
10885     return Result;
10886 
10887   // Check the chain and pointer.
10888   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
10889   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
10890 
10891   // The store should be chained directly to the load or be an operand of a
10892   // tokenfactor.
10893   if (LD == Chain.getNode())
10894     ; // ok.
10895   else if (Chain->getOpcode() != ISD::TokenFactor)
10896     return Result; // Fail.
10897   else {
10898     bool isOk = false;
10899     for (const SDValue &ChainOp : Chain->op_values())
10900       if (ChainOp.getNode() == LD) {
10901         isOk = true;
10902         break;
10903       }
10904     if (!isOk) return Result;
10905   }
10906 
10907   // This only handles simple types.
10908   if (V.getValueType() != MVT::i16 &&
10909       V.getValueType() != MVT::i32 &&
10910       V.getValueType() != MVT::i64)
10911     return Result;
10912 
10913   // Check the constant mask.  Invert it so that the bits being masked out are
10914   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
10915   // follow the sign bit for uniformity.
10916   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
10917   unsigned NotMaskLZ = countLeadingZeros(NotMask);
10918   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
10919   unsigned NotMaskTZ = countTrailingZeros(NotMask);
10920   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
10921   if (NotMaskLZ == 64) return Result;  // All zero mask.
10922 
10923   // See if we have a continuous run of bits.  If so, we have 0*1+0*
10924   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
10925     return Result;
10926 
10927   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
10928   if (V.getValueType() != MVT::i64 && NotMaskLZ)
10929     NotMaskLZ -= 64-V.getValueSizeInBits();
10930 
10931   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
10932   switch (MaskedBytes) {
10933   case 1:
10934   case 2:
10935   case 4: break;
10936   default: return Result; // All one mask, or 5-byte mask.
10937   }
10938 
10939   // Verify that the first bit starts at a multiple of mask so that the access
10940   // is aligned the same as the access width.
10941   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
10942 
10943   Result.first = MaskedBytes;
10944   Result.second = NotMaskTZ/8;
10945   return Result;
10946 }
10947 
10948 
10949 /// Check to see if IVal is something that provides a value as specified by
10950 /// MaskInfo. If so, replace the specified store with a narrower store of
10951 /// truncated IVal.
10952 static SDNode *
10953 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
10954                                 SDValue IVal, StoreSDNode *St,
10955                                 DAGCombiner *DC) {
10956   unsigned NumBytes = MaskInfo.first;
10957   unsigned ByteShift = MaskInfo.second;
10958   SelectionDAG &DAG = DC->getDAG();
10959 
10960   // Check to see if IVal is all zeros in the part being masked in by the 'or'
10961   // that uses this.  If not, this is not a replacement.
10962   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
10963                                   ByteShift*8, (ByteShift+NumBytes)*8);
10964   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
10965 
10966   // Check that it is legal on the target to do this.  It is legal if the new
10967   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
10968   // legalization.
10969   MVT VT = MVT::getIntegerVT(NumBytes*8);
10970   if (!DC->isTypeLegal(VT))
10971     return nullptr;
10972 
10973   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
10974   // shifted by ByteShift and truncated down to NumBytes.
10975   if (ByteShift) {
10976     SDLoc DL(IVal);
10977     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
10978                        DAG.getConstant(ByteShift*8, DL,
10979                                     DC->getShiftAmountTy(IVal.getValueType())));
10980   }
10981 
10982   // Figure out the offset for the store and the alignment of the access.
10983   unsigned StOffset;
10984   unsigned NewAlign = St->getAlignment();
10985 
10986   if (DAG.getDataLayout().isLittleEndian())
10987     StOffset = ByteShift;
10988   else
10989     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
10990 
10991   SDValue Ptr = St->getBasePtr();
10992   if (StOffset) {
10993     SDLoc DL(IVal);
10994     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
10995                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
10996     NewAlign = MinAlign(NewAlign, StOffset);
10997   }
10998 
10999   // Truncate down to the new size.
11000   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
11001 
11002   ++OpsNarrowed;
11003   return DAG
11004       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
11005                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
11006       .getNode();
11007 }
11008 
11009 
11010 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
11011 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
11012 /// narrowing the load and store if it would end up being a win for performance
11013 /// or code size.
11014 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
11015   StoreSDNode *ST  = cast<StoreSDNode>(N);
11016   if (ST->isVolatile())
11017     return SDValue();
11018 
11019   SDValue Chain = ST->getChain();
11020   SDValue Value = ST->getValue();
11021   SDValue Ptr   = ST->getBasePtr();
11022   EVT VT = Value.getValueType();
11023 
11024   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
11025     return SDValue();
11026 
11027   unsigned Opc = Value.getOpcode();
11028 
11029   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
11030   // is a byte mask indicating a consecutive number of bytes, check to see if
11031   // Y is known to provide just those bytes.  If so, we try to replace the
11032   // load + replace + store sequence with a single (narrower) store, which makes
11033   // the load dead.
11034   if (Opc == ISD::OR) {
11035     std::pair<unsigned, unsigned> MaskedLoad;
11036     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
11037     if (MaskedLoad.first)
11038       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11039                                                   Value.getOperand(1), ST,this))
11040         return SDValue(NewST, 0);
11041 
11042     // Or is commutative, so try swapping X and Y.
11043     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
11044     if (MaskedLoad.first)
11045       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11046                                                   Value.getOperand(0), ST,this))
11047         return SDValue(NewST, 0);
11048   }
11049 
11050   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
11051       Value.getOperand(1).getOpcode() != ISD::Constant)
11052     return SDValue();
11053 
11054   SDValue N0 = Value.getOperand(0);
11055   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11056       Chain == SDValue(N0.getNode(), 1)) {
11057     LoadSDNode *LD = cast<LoadSDNode>(N0);
11058     if (LD->getBasePtr() != Ptr ||
11059         LD->getPointerInfo().getAddrSpace() !=
11060         ST->getPointerInfo().getAddrSpace())
11061       return SDValue();
11062 
11063     // Find the type to narrow it the load / op / store to.
11064     SDValue N1 = Value.getOperand(1);
11065     unsigned BitWidth = N1.getValueSizeInBits();
11066     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
11067     if (Opc == ISD::AND)
11068       Imm ^= APInt::getAllOnesValue(BitWidth);
11069     if (Imm == 0 || Imm.isAllOnesValue())
11070       return SDValue();
11071     unsigned ShAmt = Imm.countTrailingZeros();
11072     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
11073     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
11074     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11075     // The narrowing should be profitable, the load/store operation should be
11076     // legal (or custom) and the store size should be equal to the NewVT width.
11077     while (NewBW < BitWidth &&
11078            (NewVT.getStoreSizeInBits() != NewBW ||
11079             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
11080             !TLI.isNarrowingProfitable(VT, NewVT))) {
11081       NewBW = NextPowerOf2(NewBW);
11082       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11083     }
11084     if (NewBW >= BitWidth)
11085       return SDValue();
11086 
11087     // If the lsb changed does not start at the type bitwidth boundary,
11088     // start at the previous one.
11089     if (ShAmt % NewBW)
11090       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
11091     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
11092                                    std::min(BitWidth, ShAmt + NewBW));
11093     if ((Imm & Mask) == Imm) {
11094       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
11095       if (Opc == ISD::AND)
11096         NewImm ^= APInt::getAllOnesValue(NewBW);
11097       uint64_t PtrOff = ShAmt / 8;
11098       // For big endian targets, we need to adjust the offset to the pointer to
11099       // load the correct bytes.
11100       if (DAG.getDataLayout().isBigEndian())
11101         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11102 
11103       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11104       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11105       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11106         return SDValue();
11107 
11108       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11109                                    Ptr.getValueType(), Ptr,
11110                                    DAG.getConstant(PtrOff, SDLoc(LD),
11111                                                    Ptr.getValueType()));
11112       SDValue NewLD =
11113           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11114                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11115                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
11116       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11117                                    DAG.getConstant(NewImm, SDLoc(Value),
11118                                                    NewVT));
11119       SDValue NewST =
11120           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11121                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11122 
11123       AddToWorklist(NewPtr.getNode());
11124       AddToWorklist(NewLD.getNode());
11125       AddToWorklist(NewVal.getNode());
11126       WorklistRemover DeadNodes(*this);
11127       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11128       ++OpsNarrowed;
11129       return NewST;
11130     }
11131   }
11132 
11133   return SDValue();
11134 }
11135 
11136 /// For a given floating point load / store pair, if the load value isn't used
11137 /// by any other operations, then consider transforming the pair to integer
11138 /// load / store operations if the target deems the transformation profitable.
11139 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11140   StoreSDNode *ST  = cast<StoreSDNode>(N);
11141   SDValue Chain = ST->getChain();
11142   SDValue Value = ST->getValue();
11143   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11144       Value.hasOneUse() &&
11145       Chain == SDValue(Value.getNode(), 1)) {
11146     LoadSDNode *LD = cast<LoadSDNode>(Value);
11147     EVT VT = LD->getMemoryVT();
11148     if (!VT.isFloatingPoint() ||
11149         VT != ST->getMemoryVT() ||
11150         LD->isNonTemporal() ||
11151         ST->isNonTemporal() ||
11152         LD->getPointerInfo().getAddrSpace() != 0 ||
11153         ST->getPointerInfo().getAddrSpace() != 0)
11154       return SDValue();
11155 
11156     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11157     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11158         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11159         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
11160         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
11161       return SDValue();
11162 
11163     unsigned LDAlign = LD->getAlignment();
11164     unsigned STAlign = ST->getAlignment();
11165     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11166     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11167     if (LDAlign < ABIAlign || STAlign < ABIAlign)
11168       return SDValue();
11169 
11170     SDValue NewLD =
11171         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11172                     LD->getPointerInfo(), LDAlign);
11173 
11174     SDValue NewST =
11175         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11176                      ST->getPointerInfo(), STAlign);
11177 
11178     AddToWorklist(NewLD.getNode());
11179     AddToWorklist(NewST.getNode());
11180     WorklistRemover DeadNodes(*this);
11181     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11182     ++LdStFP2Int;
11183     return NewST;
11184   }
11185 
11186   return SDValue();
11187 }
11188 
11189 namespace {
11190 /// Helper struct to parse and store a memory address as base + index + offset.
11191 /// We ignore sign extensions when it is safe to do so.
11192 /// The following two expressions are not equivalent. To differentiate we need
11193 /// to store whether there was a sign extension involved in the index
11194 /// computation.
11195 ///  (load (i64 add (i64 copyfromreg %c)
11196 ///                 (i64 signextend (add (i8 load %index)
11197 ///                                      (i8 1))))
11198 /// vs
11199 ///
11200 /// (load (i64 add (i64 copyfromreg %c)
11201 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
11202 ///                                         (i32 1)))))
11203 struct BaseIndexOffset {
11204   SDValue Base;
11205   SDValue Index;
11206   int64_t Offset;
11207   bool IsIndexSignExt;
11208 
11209   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
11210 
11211   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
11212                   bool IsIndexSignExt) :
11213     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
11214 
11215   bool equalBaseIndex(const BaseIndexOffset &Other) {
11216     return Other.Base == Base && Other.Index == Index &&
11217       Other.IsIndexSignExt == IsIndexSignExt;
11218   }
11219 
11220   /// Parses tree in Ptr for base, index, offset addresses.
11221   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
11222     bool IsIndexSignExt = false;
11223 
11224     // Split up a folded GlobalAddress+Offset into its component parts.
11225     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
11226       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
11227         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
11228                                                     SDLoc(GA),
11229                                                     GA->getValueType(0),
11230                                                     /*Offset=*/0,
11231                                                     /*isTargetGA=*/false,
11232                                                     GA->getTargetFlags()),
11233                                SDValue(),
11234                                GA->getOffset(),
11235                                IsIndexSignExt);
11236       }
11237 
11238     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
11239     // instruction, then it could be just the BASE or everything else we don't
11240     // know how to handle. Just use Ptr as BASE and give up.
11241     if (Ptr->getOpcode() != ISD::ADD)
11242       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11243 
11244     // We know that we have at least an ADD instruction. Try to pattern match
11245     // the simple case of BASE + OFFSET.
11246     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
11247       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
11248       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
11249                               IsIndexSignExt);
11250     }
11251 
11252     // Inside a loop the current BASE pointer is calculated using an ADD and a
11253     // MUL instruction. In this case Ptr is the actual BASE pointer.
11254     // (i64 add (i64 %array_ptr)
11255     //          (i64 mul (i64 %induction_var)
11256     //                   (i64 %element_size)))
11257     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
11258       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11259 
11260     // Look at Base + Index + Offset cases.
11261     SDValue Base = Ptr->getOperand(0);
11262     SDValue IndexOffset = Ptr->getOperand(1);
11263 
11264     // Skip signextends.
11265     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
11266       IndexOffset = IndexOffset->getOperand(0);
11267       IsIndexSignExt = true;
11268     }
11269 
11270     // Either the case of Base + Index (no offset) or something else.
11271     if (IndexOffset->getOpcode() != ISD::ADD)
11272       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
11273 
11274     // Now we have the case of Base + Index + offset.
11275     SDValue Index = IndexOffset->getOperand(0);
11276     SDValue Offset = IndexOffset->getOperand(1);
11277 
11278     if (!isa<ConstantSDNode>(Offset))
11279       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11280 
11281     // Ignore signextends.
11282     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
11283       Index = Index->getOperand(0);
11284       IsIndexSignExt = true;
11285     } else IsIndexSignExt = false;
11286 
11287     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
11288     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
11289   }
11290 };
11291 } // namespace
11292 
11293 // This is a helper function for visitMUL to check the profitability
11294 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
11295 // MulNode is the original multiply, AddNode is (add x, c1),
11296 // and ConstNode is c2.
11297 //
11298 // If the (add x, c1) has multiple uses, we could increase
11299 // the number of adds if we make this transformation.
11300 // It would only be worth doing this if we can remove a
11301 // multiply in the process. Check for that here.
11302 // To illustrate:
11303 //     (A + c1) * c3
11304 //     (A + c2) * c3
11305 // We're checking for cases where we have common "c3 * A" expressions.
11306 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
11307                                               SDValue &AddNode,
11308                                               SDValue &ConstNode) {
11309   APInt Val;
11310 
11311   // If the add only has one use, this would be OK to do.
11312   if (AddNode.getNode()->hasOneUse())
11313     return true;
11314 
11315   // Walk all the users of the constant with which we're multiplying.
11316   for (SDNode *Use : ConstNode->uses()) {
11317 
11318     if (Use == MulNode) // This use is the one we're on right now. Skip it.
11319       continue;
11320 
11321     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
11322       SDNode *OtherOp;
11323       SDNode *MulVar = AddNode.getOperand(0).getNode();
11324 
11325       // OtherOp is what we're multiplying against the constant.
11326       if (Use->getOperand(0) == ConstNode)
11327         OtherOp = Use->getOperand(1).getNode();
11328       else
11329         OtherOp = Use->getOperand(0).getNode();
11330 
11331       // Check to see if multiply is with the same operand of our "add".
11332       //
11333       //     ConstNode  = CONST
11334       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
11335       //     ...
11336       //     AddNode  = (A + c1)  <-- MulVar is A.
11337       //         = AddNode * ConstNode   <-- current visiting instruction.
11338       //
11339       // If we make this transformation, we will have a common
11340       // multiply (ConstNode * A) that we can save.
11341       if (OtherOp == MulVar)
11342         return true;
11343 
11344       // Now check to see if a future expansion will give us a common
11345       // multiply.
11346       //
11347       //     ConstNode  = CONST
11348       //     AddNode    = (A + c1)
11349       //     ...   = AddNode * ConstNode <-- current visiting instruction.
11350       //     ...
11351       //     OtherOp = (A + c2)
11352       //     Use     = OtherOp * ConstNode <-- visiting Use.
11353       //
11354       // If we make this transformation, we will have a common
11355       // multiply (CONST * A) after we also do the same transformation
11356       // to the "t2" instruction.
11357       if (OtherOp->getOpcode() == ISD::ADD &&
11358           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
11359           OtherOp->getOperand(0).getNode() == MulVar)
11360         return true;
11361     }
11362   }
11363 
11364   // Didn't find a case where this would be profitable.
11365   return false;
11366 }
11367 
11368 SDValue DAGCombiner::getMergedConstantVectorStore(
11369     SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
11370     SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
11371   SmallVector<SDValue, 8> BuildVector;
11372 
11373   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11374     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
11375     Chains.push_back(St->getChain());
11376     BuildVector.push_back(St->getValue());
11377   }
11378 
11379   return DAG.getBuildVector(Ty, SL, BuildVector);
11380 }
11381 
11382 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
11383                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
11384                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
11385   // Make sure we have something to merge.
11386   if (NumStores < 2)
11387     return false;
11388 
11389   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11390   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11391   unsigned LatestNodeUsed = 0;
11392 
11393   for (unsigned i=0; i < NumStores; ++i) {
11394     // Find a chain for the new wide-store operand. Notice that some
11395     // of the store nodes that we found may not be selected for inclusion
11396     // in the wide store. The chain we use needs to be the chain of the
11397     // latest store node which is *used* and replaced by the wide store.
11398     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11399       LatestNodeUsed = i;
11400   }
11401 
11402   SmallVector<SDValue, 8> Chains;
11403 
11404   // The latest Node in the DAG.
11405   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11406   SDLoc DL(StoreNodes[0].MemNode);
11407 
11408   SDValue StoredVal;
11409   if (UseVector) {
11410     bool IsVec = MemVT.isVector();
11411     unsigned Elts = NumStores;
11412     if (IsVec) {
11413       // When merging vector stores, get the total number of elements.
11414       Elts *= MemVT.getVectorNumElements();
11415     }
11416     // Get the type for the merged vector store.
11417     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11418     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
11419 
11420     if (IsConstantSrc) {
11421       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
11422     } else {
11423       SmallVector<SDValue, 8> Ops;
11424       for (unsigned i = 0; i < NumStores; ++i) {
11425         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11426         SDValue Val = St->getValue();
11427         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
11428         if (Val.getValueType() != MemVT)
11429           return false;
11430         Ops.push_back(Val);
11431         Chains.push_back(St->getChain());
11432       }
11433 
11434       // Build the extracted vector elements back into a vector.
11435       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
11436                               DL, Ty, Ops);    }
11437   } else {
11438     // We should always use a vector store when merging extracted vector
11439     // elements, so this path implies a store of constants.
11440     assert(IsConstantSrc && "Merged vector elements should use vector store");
11441 
11442     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
11443     APInt StoreInt(SizeInBits, 0);
11444 
11445     // Construct a single integer constant which is made of the smaller
11446     // constant inputs.
11447     bool IsLE = DAG.getDataLayout().isLittleEndian();
11448     for (unsigned i = 0; i < NumStores; ++i) {
11449       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
11450       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
11451       Chains.push_back(St->getChain());
11452 
11453       SDValue Val = St->getValue();
11454       StoreInt <<= ElementSizeBytes * 8;
11455       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
11456         StoreInt |= C->getAPIntValue().zext(SizeInBits);
11457       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
11458         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
11459       } else {
11460         llvm_unreachable("Invalid constant element type");
11461       }
11462     }
11463 
11464     // Create the new Load and Store operations.
11465     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11466     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
11467   }
11468 
11469   assert(!Chains.empty());
11470 
11471   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11472   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
11473                                   FirstInChain->getBasePtr(),
11474                                   FirstInChain->getPointerInfo(),
11475                                   FirstInChain->getAlignment());
11476 
11477   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11478                                                   : DAG.getSubtarget().useAA();
11479   if (UseAA) {
11480     // Replace all merged stores with the new store.
11481     for (unsigned i = 0; i < NumStores; ++i)
11482       CombineTo(StoreNodes[i].MemNode, NewStore);
11483   } else {
11484     // Replace the last store with the new store.
11485     CombineTo(LatestOp, NewStore);
11486     // Erase all other stores.
11487     for (unsigned i = 0; i < NumStores; ++i) {
11488       if (StoreNodes[i].MemNode == LatestOp)
11489         continue;
11490       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11491       // ReplaceAllUsesWith will replace all uses that existed when it was
11492       // called, but graph optimizations may cause new ones to appear. For
11493       // example, the case in pr14333 looks like
11494       //
11495       //  St's chain -> St -> another store -> X
11496       //
11497       // And the only difference from St to the other store is the chain.
11498       // When we change it's chain to be St's chain they become identical,
11499       // get CSEed and the net result is that X is now a use of St.
11500       // Since we know that St is redundant, just iterate.
11501       while (!St->use_empty())
11502         DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11503       deleteAndRecombine(St);
11504     }
11505   }
11506 
11507   return true;
11508 }
11509 
11510 void DAGCombiner::getStoreMergeAndAliasCandidates(
11511     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
11512     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
11513   // This holds the base pointer, index, and the offset in bytes from the base
11514   // pointer.
11515   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11516 
11517   // We must have a base and an offset.
11518   if (!BasePtr.Base.getNode())
11519     return;
11520 
11521   // Do not handle stores to undef base pointers.
11522   if (BasePtr.Base.isUndef())
11523     return;
11524 
11525   // Walk up the chain and look for nodes with offsets from the same
11526   // base pointer. Stop when reaching an instruction with a different kind
11527   // or instruction which has a different base pointer.
11528   EVT MemVT = St->getMemoryVT();
11529   unsigned Seq = 0;
11530   StoreSDNode *Index = St;
11531 
11532 
11533   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11534                                                   : DAG.getSubtarget().useAA();
11535 
11536   if (UseAA) {
11537     // Look at other users of the same chain. Stores on the same chain do not
11538     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11539     // to be on the same chain, so don't bother looking at adjacent chains.
11540 
11541     SDValue Chain = St->getChain();
11542     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11543       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
11544         if (I.getOperandNo() != 0)
11545           continue;
11546 
11547         if (OtherST->isVolatile() || OtherST->isIndexed())
11548           continue;
11549 
11550         if (OtherST->getMemoryVT() != MemVT)
11551           continue;
11552 
11553         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11554 
11555         if (Ptr.equalBaseIndex(BasePtr))
11556           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11557       }
11558     }
11559 
11560     return;
11561   }
11562 
11563   while (Index) {
11564     // If the chain has more than one use, then we can't reorder the mem ops.
11565     if (Index != St && !SDValue(Index, 0)->hasOneUse())
11566       break;
11567 
11568     // Find the base pointer and offset for this memory node.
11569     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11570 
11571     // Check that the base pointer is the same as the original one.
11572     if (!Ptr.equalBaseIndex(BasePtr))
11573       break;
11574 
11575     // The memory operands must not be volatile.
11576     if (Index->isVolatile() || Index->isIndexed())
11577       break;
11578 
11579     // No truncation.
11580     if (Index->isTruncatingStore())
11581       break;
11582 
11583     // The stored memory type must be the same.
11584     if (Index->getMemoryVT() != MemVT)
11585       break;
11586 
11587     // We do not allow under-aligned stores in order to prevent
11588     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11589     // be irrelevant here; what MATTERS is that we not move memory
11590     // operations that potentially overlap past each-other.
11591     if (Index->getAlignment() < MemVT.getStoreSize())
11592       break;
11593 
11594     // We found a potential memory operand to merge.
11595     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11596 
11597     // Find the next memory operand in the chain. If the next operand in the
11598     // chain is a store then move up and continue the scan with the next
11599     // memory operand. If the next operand is a load save it and use alias
11600     // information to check if it interferes with anything.
11601     SDNode *NextInChain = Index->getChain().getNode();
11602     while (1) {
11603       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
11604         // We found a store node. Use it for the next iteration.
11605         Index = STn;
11606         break;
11607       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
11608         if (Ldn->isVolatile()) {
11609           Index = nullptr;
11610           break;
11611         }
11612 
11613         // Save the load node for later. Continue the scan.
11614         AliasLoadNodes.push_back(Ldn);
11615         NextInChain = Ldn->getChain().getNode();
11616         continue;
11617       } else {
11618         Index = nullptr;
11619         break;
11620       }
11621     }
11622   }
11623 }
11624 
11625 // We need to check that merging these stores does not cause a loop
11626 // in the DAG. Any store candidate may depend on another candidate
11627 // indirectly through its operand (we already consider dependencies
11628 // through the chain). Check in parallel by searching up from
11629 // non-chain operands of candidates.
11630 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
11631     SmallVectorImpl<MemOpLink> &StoreNodes) {
11632   SmallPtrSet<const SDNode *, 16> Visited;
11633   SmallVector<const SDNode *, 8> Worklist;
11634   // search ops of store candidates
11635   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11636     SDNode *n = StoreNodes[i].MemNode;
11637     // Potential loops may happen only through non-chain operands
11638     for (unsigned j = 1; j < n->getNumOperands(); ++j)
11639       Worklist.push_back(n->getOperand(j).getNode());
11640   }
11641   // search through DAG. We can stop early if we find a storenode
11642   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11643     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
11644       return false;
11645   }
11646   return true;
11647 }
11648 
11649 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
11650   if (OptLevel == CodeGenOpt::None)
11651     return false;
11652 
11653   EVT MemVT = St->getMemoryVT();
11654   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11655   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
11656       Attribute::NoImplicitFloat);
11657 
11658   // This function cannot currently deal with non-byte-sized memory sizes.
11659   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
11660     return false;
11661 
11662   if (!MemVT.isSimple())
11663     return false;
11664 
11665   // Perform an early exit check. Do not bother looking at stored values that
11666   // are not constants, loads, or extracted vector elements.
11667   SDValue StoredVal = St->getValue();
11668   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
11669   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
11670                        isa<ConstantFPSDNode>(StoredVal);
11671   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11672                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
11673 
11674   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
11675     return false;
11676 
11677   // Don't merge vectors into wider vectors if the source data comes from loads.
11678   // TODO: This restriction can be lifted by using logic similar to the
11679   // ExtractVecSrc case.
11680   if (MemVT.isVector() && IsLoadSrc)
11681     return false;
11682 
11683   // Only look at ends of store sequences.
11684   SDValue Chain = SDValue(St, 0);
11685   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
11686     return false;
11687 
11688   // Save the LoadSDNodes that we find in the chain.
11689   // We need to make sure that these nodes do not interfere with
11690   // any of the store nodes.
11691   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
11692 
11693   // Save the StoreSDNodes that we find in the chain.
11694   SmallVector<MemOpLink, 8> StoreNodes;
11695 
11696   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
11697 
11698   // Check if there is anything to merge.
11699   if (StoreNodes.size() < 2)
11700     return false;
11701 
11702   // only do dependence check in AA case
11703   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11704                                                   : DAG.getSubtarget().useAA();
11705   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
11706     return false;
11707 
11708   // Sort the memory operands according to their distance from the
11709   // base pointer.  As a secondary criteria: make sure stores coming
11710   // later in the code come first in the list. This is important for
11711   // the non-UseAA case, because we're merging stores into the FINAL
11712   // store along a chain which potentially contains aliasing stores.
11713   // Thus, if there are multiple stores to the same address, the last
11714   // one can be considered for merging but not the others.
11715   std::sort(StoreNodes.begin(), StoreNodes.end(),
11716             [](MemOpLink LHS, MemOpLink RHS) {
11717     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
11718            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
11719             LHS.SequenceNum < RHS.SequenceNum);
11720   });
11721 
11722   // Scan the memory operations on the chain and find the first non-consecutive
11723   // store memory address.
11724   unsigned LastConsecutiveStore = 0;
11725   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
11726   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
11727 
11728     // Check that the addresses are consecutive starting from the second
11729     // element in the list of stores.
11730     if (i > 0) {
11731       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
11732       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11733         break;
11734     }
11735 
11736     // Check if this store interferes with any of the loads that we found.
11737     // If we find a load that alias with this store. Stop the sequence.
11738     if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
11739           return isAlias(Ldn, StoreNodes[i].MemNode);
11740         }))
11741       break;
11742 
11743     // Mark this node as useful.
11744     LastConsecutiveStore = i;
11745   }
11746 
11747   // The node with the lowest store address.
11748   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11749   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
11750   unsigned FirstStoreAlign = FirstInChain->getAlignment();
11751   LLVMContext &Context = *DAG.getContext();
11752   const DataLayout &DL = DAG.getDataLayout();
11753 
11754   // Store the constants into memory as one consecutive store.
11755   if (IsConstantSrc) {
11756     unsigned LastLegalType = 0;
11757     unsigned LastLegalVectorType = 0;
11758     bool NonZero = false;
11759     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11760       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11761       SDValue StoredVal = St->getValue();
11762 
11763       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
11764         NonZero |= !C->isNullValue();
11765       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
11766         NonZero |= !C->getConstantFPValue()->isNullValue();
11767       } else {
11768         // Non-constant.
11769         break;
11770       }
11771 
11772       // Find a legal type for the constant store.
11773       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11774       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11775       bool IsFast;
11776       if (TLI.isTypeLegal(StoreTy) &&
11777           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11778                                  FirstStoreAlign, &IsFast) && IsFast) {
11779         LastLegalType = i+1;
11780       // Or check whether a truncstore is legal.
11781       } else if (TLI.getTypeAction(Context, StoreTy) ==
11782                  TargetLowering::TypePromoteInteger) {
11783         EVT LegalizedStoredValueTy =
11784           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
11785         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11786             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11787                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
11788             IsFast) {
11789           LastLegalType = i + 1;
11790         }
11791       }
11792 
11793       // We only use vectors if the constant is known to be zero or the target
11794       // allows it and the function is not marked with the noimplicitfloat
11795       // attribute.
11796       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
11797                                                         FirstStoreAS)) &&
11798           !NoVectors) {
11799         // Find a legal type for the vector store.
11800         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
11801         if (TLI.isTypeLegal(Ty) &&
11802             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11803                                    FirstStoreAlign, &IsFast) && IsFast)
11804           LastLegalVectorType = i + 1;
11805       }
11806     }
11807 
11808     // Check if we found a legal integer type to store.
11809     if (LastLegalType == 0 && LastLegalVectorType == 0)
11810       return false;
11811 
11812     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
11813     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
11814 
11815     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11816                                            true, UseVector);
11817   }
11818 
11819   // When extracting multiple vector elements, try to store them
11820   // in one vector store rather than a sequence of scalar stores.
11821   if (IsExtractVecSrc) {
11822     unsigned NumStoresToMerge = 0;
11823     bool IsVec = MemVT.isVector();
11824     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
11825       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11826       unsigned StoreValOpcode = St->getValue().getOpcode();
11827       // This restriction could be loosened.
11828       // Bail out if any stored values are not elements extracted from a vector.
11829       // It should be possible to handle mixed sources, but load sources need
11830       // more careful handling (see the block of code below that handles
11831       // consecutive loads).
11832       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
11833           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
11834         return false;
11835 
11836       // Find a legal type for the vector store.
11837       unsigned Elts = i + 1;
11838       if (IsVec) {
11839         // When merging vector stores, get the total number of elements.
11840         Elts *= MemVT.getVectorNumElements();
11841       }
11842       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11843       bool IsFast;
11844       if (TLI.isTypeLegal(Ty) &&
11845           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11846                                  FirstStoreAlign, &IsFast) && IsFast)
11847         NumStoresToMerge = i + 1;
11848     }
11849 
11850     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
11851                                            false, true);
11852   }
11853 
11854   // Below we handle the case of multiple consecutive stores that
11855   // come from multiple consecutive loads. We merge them into a single
11856   // wide load and a single wide store.
11857 
11858   // Look for load nodes which are used by the stored values.
11859   SmallVector<MemOpLink, 8> LoadNodes;
11860 
11861   // Find acceptable loads. Loads need to have the same chain (token factor),
11862   // must not be zext, volatile, indexed, and they must be consecutive.
11863   BaseIndexOffset LdBasePtr;
11864   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11865     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11866     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11867     if (!Ld) break;
11868 
11869     // Loads must only have one use.
11870     if (!Ld->hasNUsesOfValue(1, 0))
11871       break;
11872 
11873     // The memory operands must not be volatile.
11874     if (Ld->isVolatile() || Ld->isIndexed())
11875       break;
11876 
11877     // We do not accept ext loads.
11878     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11879       break;
11880 
11881     // The stored memory type must be the same.
11882     if (Ld->getMemoryVT() != MemVT)
11883       break;
11884 
11885     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
11886     // If this is not the first ptr that we check.
11887     if (LdBasePtr.Base.getNode()) {
11888       // The base ptr must be the same.
11889       if (!LdPtr.equalBaseIndex(LdBasePtr))
11890         break;
11891     } else {
11892       // Check that all other base pointers are the same as this one.
11893       LdBasePtr = LdPtr;
11894     }
11895 
11896     // We found a potential memory operand to merge.
11897     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11898   }
11899 
11900   if (LoadNodes.size() < 2)
11901     return false;
11902 
11903   // If we have load/store pair instructions and we only have two values,
11904   // don't bother.
11905   unsigned RequiredAlignment;
11906   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11907       St->getAlignment() >= RequiredAlignment)
11908     return false;
11909 
11910   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11911   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11912   unsigned FirstLoadAlign = FirstLoad->getAlignment();
11913 
11914   // Scan the memory operations on the chain and find the first non-consecutive
11915   // load memory address. These variables hold the index in the store node
11916   // array.
11917   unsigned LastConsecutiveLoad = 0;
11918   // This variable refers to the size and not index in the array.
11919   unsigned LastLegalVectorType = 0;
11920   unsigned LastLegalIntegerType = 0;
11921   StartAddress = LoadNodes[0].OffsetFromBase;
11922   SDValue FirstChain = FirstLoad->getChain();
11923   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11924     // All loads must share the same chain.
11925     if (LoadNodes[i].MemNode->getChain() != FirstChain)
11926       break;
11927 
11928     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11929     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11930       break;
11931     LastConsecutiveLoad = i;
11932     // Find a legal type for the vector store.
11933     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
11934     bool IsFastSt, IsFastLd;
11935     if (TLI.isTypeLegal(StoreTy) &&
11936         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11937                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11938         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11939                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
11940       LastLegalVectorType = i + 1;
11941     }
11942 
11943     // Find a legal type for the integer store.
11944     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11945     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11946     if (TLI.isTypeLegal(StoreTy) &&
11947         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11948                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11949         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11950                                FirstLoadAlign, &IsFastLd) && IsFastLd)
11951       LastLegalIntegerType = i + 1;
11952     // Or check whether a truncstore and extload is legal.
11953     else if (TLI.getTypeAction(Context, StoreTy) ==
11954              TargetLowering::TypePromoteInteger) {
11955       EVT LegalizedStoredValueTy =
11956         TLI.getTypeToTransformTo(Context, StoreTy);
11957       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11958           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11959           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11960           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11961           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11962                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
11963           IsFastSt &&
11964           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11965                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
11966           IsFastLd)
11967         LastLegalIntegerType = i+1;
11968     }
11969   }
11970 
11971   // Only use vector types if the vector type is larger than the integer type.
11972   // If they are the same, use integers.
11973   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
11974   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
11975 
11976   // We add +1 here because the LastXXX variables refer to location while
11977   // the NumElem refers to array/index size.
11978   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
11979   NumElem = std::min(LastLegalType, NumElem);
11980 
11981   if (NumElem < 2)
11982     return false;
11983 
11984   // Collect the chains from all merged stores.
11985   SmallVector<SDValue, 8> MergeStoreChains;
11986   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
11987 
11988   // The latest Node in the DAG.
11989   unsigned LatestNodeUsed = 0;
11990   for (unsigned i=1; i<NumElem; ++i) {
11991     // Find a chain for the new wide-store operand. Notice that some
11992     // of the store nodes that we found may not be selected for inclusion
11993     // in the wide store. The chain we use needs to be the chain of the
11994     // latest store node which is *used* and replaced by the wide store.
11995     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11996       LatestNodeUsed = i;
11997 
11998     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
11999   }
12000 
12001   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
12002 
12003   // Find if it is better to use vectors or integers to load and store
12004   // to memory.
12005   EVT JointMemOpVT;
12006   if (UseVectorTy) {
12007     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12008   } else {
12009     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12010     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12011   }
12012 
12013   SDLoc LoadDL(LoadNodes[0].MemNode);
12014   SDLoc StoreDL(StoreNodes[0].MemNode);
12015 
12016   // The merged loads are required to have the same incoming chain, so
12017   // using the first's chain is acceptable.
12018   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
12019                                 FirstLoad->getBasePtr(),
12020                                 FirstLoad->getPointerInfo(), FirstLoadAlign);
12021 
12022   SDValue NewStoreChain =
12023     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
12024 
12025   SDValue NewStore =
12026       DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
12027                    FirstInChain->getPointerInfo(), FirstStoreAlign);
12028 
12029   // Transfer chain users from old loads to the new load.
12030   for (unsigned i = 0; i < NumElem; ++i) {
12031     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12032     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12033                                   SDValue(NewLoad.getNode(), 1));
12034   }
12035 
12036   if (UseAA) {
12037     // Replace the all stores with the new store.
12038     for (unsigned i = 0; i < NumElem; ++i)
12039       CombineTo(StoreNodes[i].MemNode, NewStore);
12040   } else {
12041     // Replace the last store with the new store.
12042     CombineTo(LatestOp, NewStore);
12043     // Erase all other stores.
12044     for (unsigned i = 0; i < NumElem; ++i) {
12045       // Remove all Store nodes.
12046       if (StoreNodes[i].MemNode == LatestOp)
12047         continue;
12048       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12049       DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
12050       deleteAndRecombine(St);
12051     }
12052   }
12053 
12054   return true;
12055 }
12056 
12057 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12058   SDLoc SL(ST);
12059   SDValue ReplStore;
12060 
12061   // Replace the chain to avoid dependency.
12062   if (ST->isTruncatingStore()) {
12063     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12064                                   ST->getBasePtr(), ST->getMemoryVT(),
12065                                   ST->getMemOperand());
12066   } else {
12067     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12068                              ST->getMemOperand());
12069   }
12070 
12071   // Create token to keep both nodes around.
12072   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12073                               MVT::Other, ST->getChain(), ReplStore);
12074 
12075   // Make sure the new and old chains are cleaned up.
12076   AddToWorklist(Token.getNode());
12077 
12078   // Don't add users to work list.
12079   return CombineTo(ST, Token, false);
12080 }
12081 
12082 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12083   SDValue Value = ST->getValue();
12084   if (Value.getOpcode() == ISD::TargetConstantFP)
12085     return SDValue();
12086 
12087   SDLoc DL(ST);
12088 
12089   SDValue Chain = ST->getChain();
12090   SDValue Ptr = ST->getBasePtr();
12091 
12092   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
12093 
12094   // NOTE: If the original store is volatile, this transform must not increase
12095   // the number of stores.  For example, on x86-32 an f64 can be stored in one
12096   // processor operation but an i64 (which is not legal) requires two.  So the
12097   // transform should not be done in this case.
12098 
12099   SDValue Tmp;
12100   switch (CFP->getSimpleValueType(0).SimpleTy) {
12101   default:
12102     llvm_unreachable("Unknown FP type");
12103   case MVT::f16:    // We don't do this for these yet.
12104   case MVT::f80:
12105   case MVT::f128:
12106   case MVT::ppcf128:
12107     return SDValue();
12108   case MVT::f32:
12109     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12110         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12111       ;
12112       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12113                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12114                             MVT::i32);
12115       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12116     }
12117 
12118     return SDValue();
12119   case MVT::f64:
12120     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12121          !ST->isVolatile()) ||
12122         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
12123       ;
12124       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12125                             getZExtValue(), SDLoc(CFP), MVT::i64);
12126       return DAG.getStore(Chain, DL, Tmp,
12127                           Ptr, ST->getMemOperand());
12128     }
12129 
12130     if (!ST->isVolatile() &&
12131         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12132       // Many FP stores are not made apparent until after legalize, e.g. for
12133       // argument passing.  Since this is so common, custom legalize the
12134       // 64-bit integer store into two 32-bit stores.
12135       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12136       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12137       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12138       if (DAG.getDataLayout().isBigEndian())
12139         std::swap(Lo, Hi);
12140 
12141       unsigned Alignment = ST->getAlignment();
12142       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12143       AAMDNodes AAInfo = ST->getAAInfo();
12144 
12145       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12146                                  ST->getAlignment(), MMOFlags, AAInfo);
12147       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12148                         DAG.getConstant(4, DL, Ptr.getValueType()));
12149       Alignment = MinAlign(Alignment, 4U);
12150       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12151                                  ST->getPointerInfo().getWithOffset(4),
12152                                  Alignment, MMOFlags, AAInfo);
12153       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12154                          St0, St1);
12155     }
12156 
12157     return SDValue();
12158   }
12159 }
12160 
12161 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12162   StoreSDNode *ST  = cast<StoreSDNode>(N);
12163   SDValue Chain = ST->getChain();
12164   SDValue Value = ST->getValue();
12165   SDValue Ptr   = ST->getBasePtr();
12166 
12167   // If this is a store of a bit convert, store the input value if the
12168   // resultant store does not need a higher alignment than the original.
12169   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12170       ST->isUnindexed()) {
12171     EVT SVT = Value.getOperand(0).getValueType();
12172     if (((!LegalOperations && !ST->isVolatile()) ||
12173          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12174         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12175       unsigned OrigAlign = ST->getAlignment();
12176       bool Fast = false;
12177       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12178                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
12179           Fast) {
12180         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12181                             ST->getPointerInfo(), OrigAlign,
12182                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
12183       }
12184     }
12185   }
12186 
12187   // Turn 'store undef, Ptr' -> nothing.
12188   if (Value.isUndef() && ST->isUnindexed())
12189     return Chain;
12190 
12191   // Try to infer better alignment information than the store already has.
12192   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12193     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12194       if (Align > ST->getAlignment()) {
12195         SDValue NewStore =
12196             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12197                               ST->getMemoryVT(), Align,
12198                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
12199         if (NewStore.getNode() != N)
12200           return CombineTo(ST, NewStore, true);
12201       }
12202     }
12203   }
12204 
12205   // Try transforming a pair floating point load / store ops to integer
12206   // load / store ops.
12207   if (SDValue NewST = TransformFPLoadStorePair(N))
12208     return NewST;
12209 
12210   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12211                                                   : DAG.getSubtarget().useAA();
12212 #ifndef NDEBUG
12213   if (CombinerAAOnlyFunc.getNumOccurrences() &&
12214       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
12215     UseAA = false;
12216 #endif
12217   if (UseAA && ST->isUnindexed()) {
12218     // FIXME: We should do this even without AA enabled. AA will just allow
12219     // FindBetterChain to work in more situations. The problem with this is that
12220     // any combine that expects memory operations to be on consecutive chains
12221     // first needs to be updated to look for users of the same chain.
12222 
12223     // Walk up chain skipping non-aliasing memory nodes, on this store and any
12224     // adjacent stores.
12225     if (findBetterNeighborChains(ST)) {
12226       // replaceStoreChain uses CombineTo, which handled all of the worklist
12227       // manipulation. Return the original node to not do anything else.
12228       return SDValue(ST, 0);
12229     }
12230     Chain = ST->getChain();
12231   }
12232 
12233   // Try transforming N to an indexed store.
12234   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12235     return SDValue(N, 0);
12236 
12237   // FIXME: is there such a thing as a truncating indexed store?
12238   if (ST->isTruncatingStore() && ST->isUnindexed() &&
12239       Value.getValueType().isInteger()) {
12240     // See if we can simplify the input to this truncstore with knowledge that
12241     // only the low bits are being used.  For example:
12242     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
12243     SDValue Shorter = GetDemandedBits(
12244         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12245                                     ST->getMemoryVT().getScalarSizeInBits()));
12246     AddToWorklist(Value.getNode());
12247     if (Shorter.getNode())
12248       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12249                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
12250 
12251     // Otherwise, see if we can simplify the operation with
12252     // SimplifyDemandedBits, which only works if the value has a single use.
12253     if (SimplifyDemandedBits(
12254             Value,
12255             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12256                                  ST->getMemoryVT().getScalarSizeInBits())))
12257       return SDValue(N, 0);
12258   }
12259 
12260   // If this is a load followed by a store to the same location, then the store
12261   // is dead/noop.
12262   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12263     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12264         ST->isUnindexed() && !ST->isVolatile() &&
12265         // There can't be any side effects between the load and store, such as
12266         // a call or store.
12267         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12268       // The store is dead, remove it.
12269       return Chain;
12270     }
12271   }
12272 
12273   // If this is a store followed by a store with the same value to the same
12274   // location, then the store is dead/noop.
12275   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12276     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12277         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12278         ST1->isUnindexed() && !ST1->isVolatile()) {
12279       // The store is dead, remove it.
12280       return Chain;
12281     }
12282   }
12283 
12284   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12285   // truncating store.  We can do this even if this is already a truncstore.
12286   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12287       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12288       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12289                             ST->getMemoryVT())) {
12290     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12291                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12292   }
12293 
12294   // Only perform this optimization before the types are legal, because we
12295   // don't want to perform this optimization on every DAGCombine invocation.
12296   if (!LegalTypes) {
12297     bool EverChanged = false;
12298 
12299     do {
12300       // There can be multiple store sequences on the same chain.
12301       // Keep trying to merge store sequences until we are unable to do so
12302       // or until we merge the last store on the chain.
12303       bool Changed = MergeConsecutiveStores(ST);
12304       EverChanged |= Changed;
12305       if (!Changed) break;
12306     } while (ST->getOpcode() != ISD::DELETED_NODE);
12307 
12308     if (EverChanged)
12309       return SDValue(N, 0);
12310   }
12311 
12312   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12313   //
12314   // Make sure to do this only after attempting to merge stores in order to
12315   //  avoid changing the types of some subset of stores due to visit order,
12316   //  preventing their merging.
12317   if (isa<ConstantFPSDNode>(Value)) {
12318     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12319       return NewSt;
12320   }
12321 
12322   if (SDValue NewSt = splitMergedValStore(ST))
12323     return NewSt;
12324 
12325   return ReduceLoadOpStoreWidth(N);
12326 }
12327 
12328 /// For the instruction sequence of store below, F and I values
12329 /// are bundled together as an i64 value before being stored into memory.
12330 /// Sometimes it is more efficent to generate separate stores for F and I,
12331 /// which can remove the bitwise instructions or sink them to colder places.
12332 ///
12333 ///   (store (or (zext (bitcast F to i32) to i64),
12334 ///              (shl (zext I to i64), 32)), addr)  -->
12335 ///   (store F, addr) and (store I, addr+4)
12336 ///
12337 /// Similarly, splitting for other merged store can also be beneficial, like:
12338 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12339 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12340 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12341 /// For pair of {i16, i8},  i32 store --> two i16 stores.
12342 /// For pair of {i8, i8},   i16 store --> two i8 stores.
12343 ///
12344 /// We allow each target to determine specifically which kind of splitting is
12345 /// supported.
12346 ///
12347 /// The store patterns are commonly seen from the simple code snippet below
12348 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12349 ///   void goo(const std::pair<int, float> &);
12350 ///   hoo() {
12351 ///     ...
12352 ///     goo(std::make_pair(tmp, ftmp));
12353 ///     ...
12354 ///   }
12355 ///
12356 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12357   if (OptLevel == CodeGenOpt::None)
12358     return SDValue();
12359 
12360   SDValue Val = ST->getValue();
12361   SDLoc DL(ST);
12362 
12363   // Match OR operand.
12364   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12365     return SDValue();
12366 
12367   // Match SHL operand and get Lower and Higher parts of Val.
12368   SDValue Op1 = Val.getOperand(0);
12369   SDValue Op2 = Val.getOperand(1);
12370   SDValue Lo, Hi;
12371   if (Op1.getOpcode() != ISD::SHL) {
12372     std::swap(Op1, Op2);
12373     if (Op1.getOpcode() != ISD::SHL)
12374       return SDValue();
12375   }
12376   Lo = Op2;
12377   Hi = Op1.getOperand(0);
12378   if (!Op1.hasOneUse())
12379     return SDValue();
12380 
12381   // Match shift amount to HalfValBitSize.
12382   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
12383   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
12384   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12385     return SDValue();
12386 
12387   // Lo and Hi are zero-extended from int with size less equal than 32
12388   // to i64.
12389   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12390       !Lo.getOperand(0).getValueType().isScalarInteger() ||
12391       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
12392       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12393       !Hi.getOperand(0).getValueType().isScalarInteger() ||
12394       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
12395     return SDValue();
12396 
12397   if (!TLI.isMultiStoresCheaperThanBitsMerge(Lo.getOperand(0),
12398                                              Hi.getOperand(0)))
12399     return SDValue();
12400 
12401   // Start to split store.
12402   unsigned Alignment = ST->getAlignment();
12403   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12404   AAMDNodes AAInfo = ST->getAAInfo();
12405 
12406   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
12407   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
12408   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
12409   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
12410 
12411   SDValue Chain = ST->getChain();
12412   SDValue Ptr = ST->getBasePtr();
12413   // Lower value store.
12414   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12415                              ST->getAlignment(), MMOFlags, AAInfo);
12416   Ptr =
12417       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12418                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
12419   // Higher value store.
12420   SDValue St1 =
12421       DAG.getStore(St0, DL, Hi, Ptr,
12422                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
12423                    Alignment / 2, MMOFlags, AAInfo);
12424   return St1;
12425 }
12426 
12427 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
12428   SDValue InVec = N->getOperand(0);
12429   SDValue InVal = N->getOperand(1);
12430   SDValue EltNo = N->getOperand(2);
12431   SDLoc DL(N);
12432 
12433   // If the inserted element is an UNDEF, just use the input vector.
12434   if (InVal.isUndef())
12435     return InVec;
12436 
12437   EVT VT = InVec.getValueType();
12438 
12439   // If we can't generate a legal BUILD_VECTOR, exit
12440   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
12441     return SDValue();
12442 
12443   // Check that we know which element is being inserted
12444   if (!isa<ConstantSDNode>(EltNo))
12445     return SDValue();
12446   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12447 
12448   // Canonicalize insert_vector_elt dag nodes.
12449   // Example:
12450   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
12451   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
12452   //
12453   // Do this only if the child insert_vector node has one use; also
12454   // do this only if indices are both constants and Idx1 < Idx0.
12455   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
12456       && isa<ConstantSDNode>(InVec.getOperand(2))) {
12457     unsigned OtherElt =
12458       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
12459     if (Elt < OtherElt) {
12460       // Swap nodes.
12461       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
12462                                   InVec.getOperand(0), InVal, EltNo);
12463       AddToWorklist(NewOp.getNode());
12464       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
12465                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
12466     }
12467   }
12468 
12469   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
12470   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
12471   // vector elements.
12472   SmallVector<SDValue, 8> Ops;
12473   // Do not combine these two vectors if the output vector will not replace
12474   // the input vector.
12475   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
12476     Ops.append(InVec.getNode()->op_begin(),
12477                InVec.getNode()->op_end());
12478   } else if (InVec.isUndef()) {
12479     unsigned NElts = VT.getVectorNumElements();
12480     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
12481   } else {
12482     return SDValue();
12483   }
12484 
12485   // Insert the element
12486   if (Elt < Ops.size()) {
12487     // All the operands of BUILD_VECTOR must have the same type;
12488     // we enforce that here.
12489     EVT OpVT = Ops[0].getValueType();
12490     if (InVal.getValueType() != OpVT)
12491       InVal = OpVT.bitsGT(InVal.getValueType()) ?
12492                 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
12493                 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
12494     Ops[Elt] = InVal;
12495   }
12496 
12497   // Return the new vector
12498   return DAG.getBuildVector(VT, DL, Ops);
12499 }
12500 
12501 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
12502     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
12503   assert(!OriginalLoad->isVolatile());
12504 
12505   EVT ResultVT = EVE->getValueType(0);
12506   EVT VecEltVT = InVecVT.getVectorElementType();
12507   unsigned Align = OriginalLoad->getAlignment();
12508   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
12509       VecEltVT.getTypeForEVT(*DAG.getContext()));
12510 
12511   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12512     return SDValue();
12513 
12514   Align = NewAlign;
12515 
12516   SDValue NewPtr = OriginalLoad->getBasePtr();
12517   SDValue Offset;
12518   EVT PtrType = NewPtr.getValueType();
12519   MachinePointerInfo MPI;
12520   SDLoc DL(EVE);
12521   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12522     int Elt = ConstEltNo->getZExtValue();
12523     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12524     Offset = DAG.getConstant(PtrOff, DL, PtrType);
12525     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12526   } else {
12527     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
12528     Offset = DAG.getNode(
12529         ISD::MUL, DL, PtrType, Offset,
12530         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
12531     MPI = OriginalLoad->getPointerInfo();
12532   }
12533   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
12534 
12535   // The replacement we need to do here is a little tricky: we need to
12536   // replace an extractelement of a load with a load.
12537   // Use ReplaceAllUsesOfValuesWith to do the replacement.
12538   // Note that this replacement assumes that the extractvalue is the only
12539   // use of the load; that's okay because we don't want to perform this
12540   // transformation in other cases anyway.
12541   SDValue Load;
12542   SDValue Chain;
12543   if (ResultVT.bitsGT(VecEltVT)) {
12544     // If the result type of vextract is wider than the load, then issue an
12545     // extending load instead.
12546     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
12547                                                   VecEltVT)
12548                                    ? ISD::ZEXTLOAD
12549                                    : ISD::EXTLOAD;
12550     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
12551                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
12552                           Align, OriginalLoad->getMemOperand()->getFlags(),
12553                           OriginalLoad->getAAInfo());
12554     Chain = Load.getValue(1);
12555   } else {
12556     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
12557                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
12558                        OriginalLoad->getAAInfo());
12559     Chain = Load.getValue(1);
12560     if (ResultVT.bitsLT(VecEltVT))
12561       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
12562     else
12563       Load = DAG.getBitcast(ResultVT, Load);
12564   }
12565   WorklistRemover DeadNodes(*this);
12566   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
12567   SDValue To[] = { Load, Chain };
12568   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
12569   // Since we're explicitly calling ReplaceAllUses, add the new node to the
12570   // worklist explicitly as well.
12571   AddToWorklist(Load.getNode());
12572   AddUsersToWorklist(Load.getNode()); // Add users too
12573   // Make sure to revisit this node to clean it up; it will usually be dead.
12574   AddToWorklist(EVE);
12575   ++OpsNarrowed;
12576   return SDValue(EVE, 0);
12577 }
12578 
12579 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
12580   // (vextract (scalar_to_vector val, 0) -> val
12581   SDValue InVec = N->getOperand(0);
12582   EVT VT = InVec.getValueType();
12583   EVT NVT = N->getValueType(0);
12584 
12585   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
12586     // Check if the result type doesn't match the inserted element type. A
12587     // SCALAR_TO_VECTOR may truncate the inserted element and the
12588     // EXTRACT_VECTOR_ELT may widen the extracted vector.
12589     SDValue InOp = InVec.getOperand(0);
12590     if (InOp.getValueType() != NVT) {
12591       assert(InOp.getValueType().isInteger() && NVT.isInteger());
12592       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
12593     }
12594     return InOp;
12595   }
12596 
12597   SDValue EltNo = N->getOperand(1);
12598   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
12599 
12600   // extract_vector_elt (build_vector x, y), 1 -> y
12601   if (ConstEltNo &&
12602       InVec.getOpcode() == ISD::BUILD_VECTOR &&
12603       TLI.isTypeLegal(VT) &&
12604       (InVec.hasOneUse() ||
12605        TLI.aggressivelyPreferBuildVectorSources(VT))) {
12606     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
12607     EVT InEltVT = Elt.getValueType();
12608 
12609     // Sometimes build_vector's scalar input types do not match result type.
12610     if (NVT == InEltVT)
12611       return Elt;
12612 
12613     // TODO: It may be useful to truncate if free if the build_vector implicitly
12614     // converts.
12615   }
12616 
12617   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
12618   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
12619       ConstEltNo->isNullValue() && VT.isInteger()) {
12620     SDValue BCSrc = InVec.getOperand(0);
12621     if (BCSrc.getValueType().isScalarInteger())
12622       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
12623   }
12624 
12625   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
12626   //
12627   // This only really matters if the index is non-constant since other combines
12628   // on the constant elements already work.
12629   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12630       EltNo == InVec.getOperand(2)) {
12631     SDValue Elt = InVec.getOperand(1);
12632     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
12633   }
12634 
12635   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
12636   // We only perform this optimization before the op legalization phase because
12637   // we may introduce new vector instructions which are not backed by TD
12638   // patterns. For example on AVX, extracting elements from a wide vector
12639   // without using extract_subvector. However, if we can find an underlying
12640   // scalar value, then we can always use that.
12641   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
12642     int NumElem = VT.getVectorNumElements();
12643     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
12644     // Find the new index to extract from.
12645     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
12646 
12647     // Extracting an undef index is undef.
12648     if (OrigElt == -1)
12649       return DAG.getUNDEF(NVT);
12650 
12651     // Select the right vector half to extract from.
12652     SDValue SVInVec;
12653     if (OrigElt < NumElem) {
12654       SVInVec = InVec->getOperand(0);
12655     } else {
12656       SVInVec = InVec->getOperand(1);
12657       OrigElt -= NumElem;
12658     }
12659 
12660     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
12661       SDValue InOp = SVInVec.getOperand(OrigElt);
12662       if (InOp.getValueType() != NVT) {
12663         assert(InOp.getValueType().isInteger() && NVT.isInteger());
12664         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
12665       }
12666 
12667       return InOp;
12668     }
12669 
12670     // FIXME: We should handle recursing on other vector shuffles and
12671     // scalar_to_vector here as well.
12672 
12673     if (!LegalOperations) {
12674       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12675       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
12676                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
12677     }
12678   }
12679 
12680   bool BCNumEltsChanged = false;
12681   EVT ExtVT = VT.getVectorElementType();
12682   EVT LVT = ExtVT;
12683 
12684   // If the result of load has to be truncated, then it's not necessarily
12685   // profitable.
12686   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
12687     return SDValue();
12688 
12689   if (InVec.getOpcode() == ISD::BITCAST) {
12690     // Don't duplicate a load with other uses.
12691     if (!InVec.hasOneUse())
12692       return SDValue();
12693 
12694     EVT BCVT = InVec.getOperand(0).getValueType();
12695     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
12696       return SDValue();
12697     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
12698       BCNumEltsChanged = true;
12699     InVec = InVec.getOperand(0);
12700     ExtVT = BCVT.getVectorElementType();
12701   }
12702 
12703   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
12704   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
12705       ISD::isNormalLoad(InVec.getNode()) &&
12706       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
12707     SDValue Index = N->getOperand(1);
12708     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
12709       if (!OrigLoad->isVolatile()) {
12710         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
12711                                                              OrigLoad);
12712       }
12713     }
12714   }
12715 
12716   // Perform only after legalization to ensure build_vector / vector_shuffle
12717   // optimizations have already been done.
12718   if (!LegalOperations) return SDValue();
12719 
12720   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
12721   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
12722   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
12723 
12724   if (ConstEltNo) {
12725     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12726 
12727     LoadSDNode *LN0 = nullptr;
12728     const ShuffleVectorSDNode *SVN = nullptr;
12729     if (ISD::isNormalLoad(InVec.getNode())) {
12730       LN0 = cast<LoadSDNode>(InVec);
12731     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
12732                InVec.getOperand(0).getValueType() == ExtVT &&
12733                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
12734       // Don't duplicate a load with other uses.
12735       if (!InVec.hasOneUse())
12736         return SDValue();
12737 
12738       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
12739     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
12740       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
12741       // =>
12742       // (load $addr+1*size)
12743 
12744       // Don't duplicate a load with other uses.
12745       if (!InVec.hasOneUse())
12746         return SDValue();
12747 
12748       // If the bit convert changed the number of elements, it is unsafe
12749       // to examine the mask.
12750       if (BCNumEltsChanged)
12751         return SDValue();
12752 
12753       // Select the input vector, guarding against out of range extract vector.
12754       unsigned NumElems = VT.getVectorNumElements();
12755       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
12756       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
12757 
12758       if (InVec.getOpcode() == ISD::BITCAST) {
12759         // Don't duplicate a load with other uses.
12760         if (!InVec.hasOneUse())
12761           return SDValue();
12762 
12763         InVec = InVec.getOperand(0);
12764       }
12765       if (ISD::isNormalLoad(InVec.getNode())) {
12766         LN0 = cast<LoadSDNode>(InVec);
12767         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
12768         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
12769       }
12770     }
12771 
12772     // Make sure we found a non-volatile load and the extractelement is
12773     // the only use.
12774     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
12775       return SDValue();
12776 
12777     // If Idx was -1 above, Elt is going to be -1, so just return undef.
12778     if (Elt == -1)
12779       return DAG.getUNDEF(LVT);
12780 
12781     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
12782   }
12783 
12784   return SDValue();
12785 }
12786 
12787 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
12788 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
12789   // We perform this optimization post type-legalization because
12790   // the type-legalizer often scalarizes integer-promoted vectors.
12791   // Performing this optimization before may create bit-casts which
12792   // will be type-legalized to complex code sequences.
12793   // We perform this optimization only before the operation legalizer because we
12794   // may introduce illegal operations.
12795   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
12796     return SDValue();
12797 
12798   unsigned NumInScalars = N->getNumOperands();
12799   SDLoc DL(N);
12800   EVT VT = N->getValueType(0);
12801 
12802   // Check to see if this is a BUILD_VECTOR of a bunch of values
12803   // which come from any_extend or zero_extend nodes. If so, we can create
12804   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
12805   // optimizations. We do not handle sign-extend because we can't fill the sign
12806   // using shuffles.
12807   EVT SourceType = MVT::Other;
12808   bool AllAnyExt = true;
12809 
12810   for (unsigned i = 0; i != NumInScalars; ++i) {
12811     SDValue In = N->getOperand(i);
12812     // Ignore undef inputs.
12813     if (In.isUndef()) continue;
12814 
12815     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
12816     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
12817 
12818     // Abort if the element is not an extension.
12819     if (!ZeroExt && !AnyExt) {
12820       SourceType = MVT::Other;
12821       break;
12822     }
12823 
12824     // The input is a ZeroExt or AnyExt. Check the original type.
12825     EVT InTy = In.getOperand(0).getValueType();
12826 
12827     // Check that all of the widened source types are the same.
12828     if (SourceType == MVT::Other)
12829       // First time.
12830       SourceType = InTy;
12831     else if (InTy != SourceType) {
12832       // Multiple income types. Abort.
12833       SourceType = MVT::Other;
12834       break;
12835     }
12836 
12837     // Check if all of the extends are ANY_EXTENDs.
12838     AllAnyExt &= AnyExt;
12839   }
12840 
12841   // In order to have valid types, all of the inputs must be extended from the
12842   // same source type and all of the inputs must be any or zero extend.
12843   // Scalar sizes must be a power of two.
12844   EVT OutScalarTy = VT.getScalarType();
12845   bool ValidTypes = SourceType != MVT::Other &&
12846                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
12847                  isPowerOf2_32(SourceType.getSizeInBits());
12848 
12849   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
12850   // turn into a single shuffle instruction.
12851   if (!ValidTypes)
12852     return SDValue();
12853 
12854   bool isLE = DAG.getDataLayout().isLittleEndian();
12855   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
12856   assert(ElemRatio > 1 && "Invalid element size ratio");
12857   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
12858                                DAG.getConstant(0, DL, SourceType);
12859 
12860   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
12861   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
12862 
12863   // Populate the new build_vector
12864   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12865     SDValue Cast = N->getOperand(i);
12866     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
12867             Cast.getOpcode() == ISD::ZERO_EXTEND ||
12868             Cast.isUndef()) && "Invalid cast opcode");
12869     SDValue In;
12870     if (Cast.isUndef())
12871       In = DAG.getUNDEF(SourceType);
12872     else
12873       In = Cast->getOperand(0);
12874     unsigned Index = isLE ? (i * ElemRatio) :
12875                             (i * ElemRatio + (ElemRatio - 1));
12876 
12877     assert(Index < Ops.size() && "Invalid index");
12878     Ops[Index] = In;
12879   }
12880 
12881   // The type of the new BUILD_VECTOR node.
12882   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
12883   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
12884          "Invalid vector size");
12885   // Check if the new vector type is legal.
12886   if (!isTypeLegal(VecVT)) return SDValue();
12887 
12888   // Make the new BUILD_VECTOR.
12889   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
12890 
12891   // The new BUILD_VECTOR node has the potential to be further optimized.
12892   AddToWorklist(BV.getNode());
12893   // Bitcast to the desired type.
12894   return DAG.getBitcast(VT, BV);
12895 }
12896 
12897 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
12898   EVT VT = N->getValueType(0);
12899 
12900   unsigned NumInScalars = N->getNumOperands();
12901   SDLoc DL(N);
12902 
12903   EVT SrcVT = MVT::Other;
12904   unsigned Opcode = ISD::DELETED_NODE;
12905   unsigned NumDefs = 0;
12906 
12907   for (unsigned i = 0; i != NumInScalars; ++i) {
12908     SDValue In = N->getOperand(i);
12909     unsigned Opc = In.getOpcode();
12910 
12911     if (Opc == ISD::UNDEF)
12912       continue;
12913 
12914     // If all scalar values are floats and converted from integers.
12915     if (Opcode == ISD::DELETED_NODE &&
12916         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
12917       Opcode = Opc;
12918     }
12919 
12920     if (Opc != Opcode)
12921       return SDValue();
12922 
12923     EVT InVT = In.getOperand(0).getValueType();
12924 
12925     // If all scalar values are typed differently, bail out. It's chosen to
12926     // simplify BUILD_VECTOR of integer types.
12927     if (SrcVT == MVT::Other)
12928       SrcVT = InVT;
12929     if (SrcVT != InVT)
12930       return SDValue();
12931     NumDefs++;
12932   }
12933 
12934   // If the vector has just one element defined, it's not worth to fold it into
12935   // a vectorized one.
12936   if (NumDefs < 2)
12937     return SDValue();
12938 
12939   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
12940          && "Should only handle conversion from integer to float.");
12941   assert(SrcVT != MVT::Other && "Cannot determine source type!");
12942 
12943   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
12944 
12945   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
12946     return SDValue();
12947 
12948   // Just because the floating-point vector type is legal does not necessarily
12949   // mean that the corresponding integer vector type is.
12950   if (!isTypeLegal(NVT))
12951     return SDValue();
12952 
12953   SmallVector<SDValue, 8> Opnds;
12954   for (unsigned i = 0; i != NumInScalars; ++i) {
12955     SDValue In = N->getOperand(i);
12956 
12957     if (In.isUndef())
12958       Opnds.push_back(DAG.getUNDEF(SrcVT));
12959     else
12960       Opnds.push_back(In.getOperand(0));
12961   }
12962   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
12963   AddToWorklist(BV.getNode());
12964 
12965   return DAG.getNode(Opcode, DL, VT, BV);
12966 }
12967 
12968 SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
12969                                            ArrayRef<int> VectorMask,
12970                                            SDValue VecIn1, SDValue VecIn2,
12971                                            unsigned LeftIdx) {
12972   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12973   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
12974 
12975   EVT VT = N->getValueType(0);
12976   EVT InVT1 = VecIn1.getValueType();
12977   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
12978 
12979   unsigned Vec2Offset = InVT1.getVectorNumElements();
12980   unsigned NumElems = VT.getVectorNumElements();
12981   unsigned ShuffleNumElems = NumElems;
12982 
12983   // We can't generate a shuffle node with mismatched input and output types.
12984   // Try to make the types match the type of the output.
12985   if (InVT1 != VT || InVT2 != VT) {
12986     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
12987       // If the output vector length is a multiple of both input lengths,
12988       // we can concatenate them and pad the rest with undefs.
12989       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
12990       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
12991       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
12992       ConcatOps[0] = VecIn1;
12993       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
12994       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
12995       VecIn2 = SDValue();
12996     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
12997       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
12998         return SDValue();
12999 
13000       if (!VecIn2.getNode()) {
13001         // If we only have one input vector, and it's twice the size of the
13002         // output, split it in two.
13003         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
13004                              DAG.getConstant(NumElems, DL, IdxTy));
13005         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
13006         // Since we now have shorter input vectors, adjust the offset of the
13007         // second vector's start.
13008         Vec2Offset = NumElems;
13009       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
13010         // VecIn1 is wider than the output, and we have another, possibly
13011         // smaller input. Pad the smaller input with undefs, shuffle at the
13012         // input vector width, and extract the output.
13013         // The shuffle type is different than VT, so check legality again.
13014         if (LegalOperations &&
13015             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
13016           return SDValue();
13017 
13018         if (InVT1 != InVT2)
13019           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
13020                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
13021         ShuffleNumElems = NumElems * 2;
13022       } else {
13023         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
13024         // than VecIn1. We can't handle this for now - this case will disappear
13025         // when we start sorting the vectors by type.
13026         return SDValue();
13027       }
13028     } else {
13029       // TODO: Support cases where the length mismatch isn't exactly by a
13030       // factor of 2.
13031       // TODO: Move this check upwards, so that if we have bad type
13032       // mismatches, we don't create any DAG nodes.
13033       return SDValue();
13034     }
13035   }
13036 
13037   // Initialize mask to undef.
13038   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13039 
13040   // Only need to run up to the number of elements actually used, not the
13041   // total number of elements in the shuffle - if we are shuffling a wider
13042   // vector, the high lanes should be set to undef.
13043   for (unsigned i = 0; i != NumElems; ++i) {
13044     if (VectorMask[i] <= 0)
13045       continue;
13046 
13047     SDValue Extract = N->getOperand(i);
13048     unsigned ExtIndex =
13049         cast<ConstantSDNode>(Extract.getOperand(1))->getZExtValue();
13050 
13051     if (VectorMask[i] == (int)LeftIdx) {
13052       Mask[i] = ExtIndex;
13053     } else if (VectorMask[i] == (int)LeftIdx + 1) {
13054       Mask[i] = Vec2Offset + ExtIndex;
13055     }
13056   }
13057 
13058   // The type the input vectors may have changed above.
13059   InVT1 = VecIn1.getValueType();
13060 
13061   // If we already have a VecIn2, it should have the same type as VecIn1.
13062   // If we don't, get an undef/zero vector of the appropriate type.
13063   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
13064   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
13065 
13066   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
13067   if (ShuffleNumElems > NumElems)
13068     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
13069 
13070   return Shuffle;
13071 }
13072 
13073 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
13074 // operations. If the types of the vectors we're extracting from allow it,
13075 // turn this into a vector_shuffle node.
13076 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
13077   SDLoc DL(N);
13078   EVT VT = N->getValueType(0);
13079 
13080   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
13081   if (!isTypeLegal(VT))
13082     return SDValue();
13083 
13084   // May only combine to shuffle after legalize if shuffle is legal.
13085   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
13086     return SDValue();
13087 
13088   bool UsesZeroVector = false;
13089   unsigned NumElems = N->getNumOperands();
13090 
13091   // Record, for each element of the newly built vector, which input vector
13092   // that element comes from. -1 stands for undef, 0 for the zero vector,
13093   // and positive values for the input vectors.
13094   // VectorMask maps each element to its vector number, and VecIn maps vector
13095   // numbers to their initial SDValues.
13096 
13097   SmallVector<int, 8> VectorMask(NumElems, -1);
13098   SmallVector<SDValue, 8> VecIn;
13099   VecIn.push_back(SDValue());
13100 
13101   for (unsigned i = 0; i != NumElems; ++i) {
13102     SDValue Op = N->getOperand(i);
13103 
13104     if (Op.isUndef())
13105       continue;
13106 
13107     // See if we can use a blend with a zero vector.
13108     // TODO: Should we generalize this to a blend with an arbitrary constant
13109     // vector?
13110     if (isNullConstant(Op) || isNullFPConstant(Op)) {
13111       UsesZeroVector = true;
13112       VectorMask[i] = 0;
13113       continue;
13114     }
13115 
13116     // Not an undef or zero. If the input is something other than an
13117     // EXTRACT_VECTOR_ELT with a constant index, bail out.
13118     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13119         !isa<ConstantSDNode>(Op.getOperand(1)))
13120       return SDValue();
13121 
13122     SDValue ExtractedFromVec = Op.getOperand(0);
13123 
13124     // All inputs must have the same element type as the output.
13125     if (VT.getVectorElementType() !=
13126         ExtractedFromVec.getValueType().getVectorElementType())
13127       return SDValue();
13128 
13129     // Have we seen this input vector before?
13130     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
13131     // a map back from SDValues to numbers isn't worth it.
13132     unsigned Idx = std::distance(
13133         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
13134     if (Idx == VecIn.size())
13135       VecIn.push_back(ExtractedFromVec);
13136 
13137     VectorMask[i] = Idx;
13138   }
13139 
13140   // If we didn't find at least one input vector, bail out.
13141   if (VecIn.size() < 2)
13142     return SDValue();
13143 
13144   // TODO: We want to sort the vectors by descending length, so that adjacent
13145   // pairs have similar length, and the longer vector is always first in the
13146   // pair.
13147 
13148   // TODO: Should this fire if some of the input vectors has illegal type (like
13149   // it does now), or should we let legalization run its course first?
13150 
13151   // Shuffle phase:
13152   // Take pairs of vectors, and shuffle them so that the result has elements
13153   // from these vectors in the correct places.
13154   // For example, given:
13155   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
13156   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
13157   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
13158   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
13159   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
13160   // We will generate:
13161   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
13162   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
13163   SmallVector<SDValue, 4> Shuffles;
13164   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
13165     unsigned LeftIdx = 2 * In + 1;
13166     SDValue VecLeft = VecIn[LeftIdx];
13167     SDValue VecRight =
13168         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
13169 
13170     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
13171                                                 VecRight, LeftIdx))
13172       Shuffles.push_back(Shuffle);
13173     else
13174       return SDValue();
13175   }
13176 
13177   // If we need the zero vector as an "ingredient" in the blend tree, add it
13178   // to the list of shuffles.
13179   if (UsesZeroVector)
13180     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
13181                                       : DAG.getConstantFP(0.0, DL, VT));
13182 
13183   // If we only have one shuffle, we're done.
13184   if (Shuffles.size() == 1)
13185     return Shuffles[0];
13186 
13187   // Update the vector mask to point to the post-shuffle vectors.
13188   for (int &Vec : VectorMask)
13189     if (Vec == 0)
13190       Vec = Shuffles.size() - 1;
13191     else
13192       Vec = (Vec - 1) / 2;
13193 
13194   // More than one shuffle. Generate a binary tree of blends, e.g. if from
13195   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
13196   // generate:
13197   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
13198   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
13199   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
13200   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
13201   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
13202   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
13203   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
13204 
13205   // Make sure the initial size of the shuffle list is even.
13206   if (Shuffles.size() % 2)
13207     Shuffles.push_back(DAG.getUNDEF(VT));
13208 
13209   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
13210     if (CurSize % 2) {
13211       Shuffles[CurSize] = DAG.getUNDEF(VT);
13212       CurSize++;
13213     }
13214     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
13215       int Left = 2 * In;
13216       int Right = 2 * In + 1;
13217       SmallVector<int, 8> Mask(NumElems, -1);
13218       for (unsigned i = 0; i != NumElems; ++i) {
13219         if (VectorMask[i] == Left) {
13220           Mask[i] = i;
13221           VectorMask[i] = In;
13222         } else if (VectorMask[i] == Right) {
13223           Mask[i] = i + NumElems;
13224           VectorMask[i] = In;
13225         }
13226       }
13227 
13228       Shuffles[In] =
13229           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
13230     }
13231   }
13232 
13233   return Shuffles[0];
13234 }
13235 
13236 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13237   EVT VT = N->getValueType(0);
13238 
13239   // A vector built entirely of undefs is undef.
13240   if (ISD::allOperandsUndef(N))
13241     return DAG.getUNDEF(VT);
13242 
13243   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13244     return V;
13245 
13246   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13247     return V;
13248 
13249   if (SDValue V = reduceBuildVecToShuffle(N))
13250     return V;
13251 
13252   return SDValue();
13253 }
13254 
13255 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
13256   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13257   EVT OpVT = N->getOperand(0).getValueType();
13258 
13259   // If the operands are legal vectors, leave them alone.
13260   if (TLI.isTypeLegal(OpVT))
13261     return SDValue();
13262 
13263   SDLoc DL(N);
13264   EVT VT = N->getValueType(0);
13265   SmallVector<SDValue, 8> Ops;
13266 
13267   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13268   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13269 
13270   // Keep track of what we encounter.
13271   bool AnyInteger = false;
13272   bool AnyFP = false;
13273   for (const SDValue &Op : N->ops()) {
13274     if (ISD::BITCAST == Op.getOpcode() &&
13275         !Op.getOperand(0).getValueType().isVector())
13276       Ops.push_back(Op.getOperand(0));
13277     else if (ISD::UNDEF == Op.getOpcode())
13278       Ops.push_back(ScalarUndef);
13279     else
13280       return SDValue();
13281 
13282     // Note whether we encounter an integer or floating point scalar.
13283     // If it's neither, bail out, it could be something weird like x86mmx.
13284     EVT LastOpVT = Ops.back().getValueType();
13285     if (LastOpVT.isFloatingPoint())
13286       AnyFP = true;
13287     else if (LastOpVT.isInteger())
13288       AnyInteger = true;
13289     else
13290       return SDValue();
13291   }
13292 
13293   // If any of the operands is a floating point scalar bitcast to a vector,
13294   // use floating point types throughout, and bitcast everything.
13295   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13296   if (AnyFP) {
13297     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13298     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13299     if (AnyInteger) {
13300       for (SDValue &Op : Ops) {
13301         if (Op.getValueType() == SVT)
13302           continue;
13303         if (Op.isUndef())
13304           Op = ScalarUndef;
13305         else
13306           Op = DAG.getBitcast(SVT, Op);
13307       }
13308     }
13309   }
13310 
13311   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13312                                VT.getSizeInBits() / SVT.getSizeInBits());
13313   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13314 }
13315 
13316 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13317 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13318 // most two distinct vectors the same size as the result, attempt to turn this
13319 // into a legal shuffle.
13320 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
13321   EVT VT = N->getValueType(0);
13322   EVT OpVT = N->getOperand(0).getValueType();
13323   int NumElts = VT.getVectorNumElements();
13324   int NumOpElts = OpVT.getVectorNumElements();
13325 
13326   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13327   SmallVector<int, 8> Mask;
13328 
13329   for (SDValue Op : N->ops()) {
13330     // Peek through any bitcast.
13331     while (Op.getOpcode() == ISD::BITCAST)
13332       Op = Op.getOperand(0);
13333 
13334     // UNDEF nodes convert to UNDEF shuffle mask values.
13335     if (Op.isUndef()) {
13336       Mask.append((unsigned)NumOpElts, -1);
13337       continue;
13338     }
13339 
13340     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13341       return SDValue();
13342 
13343     // What vector are we extracting the subvector from and at what index?
13344     SDValue ExtVec = Op.getOperand(0);
13345 
13346     // We want the EVT of the original extraction to correctly scale the
13347     // extraction index.
13348     EVT ExtVT = ExtVec.getValueType();
13349 
13350     // Peek through any bitcast.
13351     while (ExtVec.getOpcode() == ISD::BITCAST)
13352       ExtVec = ExtVec.getOperand(0);
13353 
13354     // UNDEF nodes convert to UNDEF shuffle mask values.
13355     if (ExtVec.isUndef()) {
13356       Mask.append((unsigned)NumOpElts, -1);
13357       continue;
13358     }
13359 
13360     if (!isa<ConstantSDNode>(Op.getOperand(1)))
13361       return SDValue();
13362     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
13363 
13364     // Ensure that we are extracting a subvector from a vector the same
13365     // size as the result.
13366     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
13367       return SDValue();
13368 
13369     // Scale the subvector index to account for any bitcast.
13370     int NumExtElts = ExtVT.getVectorNumElements();
13371     if (0 == (NumExtElts % NumElts))
13372       ExtIdx /= (NumExtElts / NumElts);
13373     else if (0 == (NumElts % NumExtElts))
13374       ExtIdx *= (NumElts / NumExtElts);
13375     else
13376       return SDValue();
13377 
13378     // At most we can reference 2 inputs in the final shuffle.
13379     if (SV0.isUndef() || SV0 == ExtVec) {
13380       SV0 = ExtVec;
13381       for (int i = 0; i != NumOpElts; ++i)
13382         Mask.push_back(i + ExtIdx);
13383     } else if (SV1.isUndef() || SV1 == ExtVec) {
13384       SV1 = ExtVec;
13385       for (int i = 0; i != NumOpElts; ++i)
13386         Mask.push_back(i + ExtIdx + NumElts);
13387     } else {
13388       return SDValue();
13389     }
13390   }
13391 
13392   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
13393     return SDValue();
13394 
13395   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
13396                               DAG.getBitcast(VT, SV1), Mask);
13397 }
13398 
13399 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
13400   // If we only have one input vector, we don't need to do any concatenation.
13401   if (N->getNumOperands() == 1)
13402     return N->getOperand(0);
13403 
13404   // Check if all of the operands are undefs.
13405   EVT VT = N->getValueType(0);
13406   if (ISD::allOperandsUndef(N))
13407     return DAG.getUNDEF(VT);
13408 
13409   // Optimize concat_vectors where all but the first of the vectors are undef.
13410   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
13411         return Op.isUndef();
13412       })) {
13413     SDValue In = N->getOperand(0);
13414     assert(In.getValueType().isVector() && "Must concat vectors");
13415 
13416     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
13417     if (In->getOpcode() == ISD::BITCAST &&
13418         !In->getOperand(0)->getValueType(0).isVector()) {
13419       SDValue Scalar = In->getOperand(0);
13420 
13421       // If the bitcast type isn't legal, it might be a trunc of a legal type;
13422       // look through the trunc so we can still do the transform:
13423       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
13424       if (Scalar->getOpcode() == ISD::TRUNCATE &&
13425           !TLI.isTypeLegal(Scalar.getValueType()) &&
13426           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
13427         Scalar = Scalar->getOperand(0);
13428 
13429       EVT SclTy = Scalar->getValueType(0);
13430 
13431       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
13432         return SDValue();
13433 
13434       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
13435                                  VT.getSizeInBits() / SclTy.getSizeInBits());
13436       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
13437         return SDValue();
13438 
13439       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
13440       return DAG.getBitcast(VT, Res);
13441     }
13442   }
13443 
13444   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
13445   // We have already tested above for an UNDEF only concatenation.
13446   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
13447   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
13448   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
13449     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
13450   };
13451   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
13452     SmallVector<SDValue, 8> Opnds;
13453     EVT SVT = VT.getScalarType();
13454 
13455     EVT MinVT = SVT;
13456     if (!SVT.isFloatingPoint()) {
13457       // If BUILD_VECTOR are from built from integer, they may have different
13458       // operand types. Get the smallest type and truncate all operands to it.
13459       bool FoundMinVT = false;
13460       for (const SDValue &Op : N->ops())
13461         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13462           EVT OpSVT = Op.getOperand(0)->getValueType(0);
13463           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
13464           FoundMinVT = true;
13465         }
13466       assert(FoundMinVT && "Concat vector type mismatch");
13467     }
13468 
13469     for (const SDValue &Op : N->ops()) {
13470       EVT OpVT = Op.getValueType();
13471       unsigned NumElts = OpVT.getVectorNumElements();
13472 
13473       if (ISD::UNDEF == Op.getOpcode())
13474         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
13475 
13476       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13477         if (SVT.isFloatingPoint()) {
13478           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
13479           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
13480         } else {
13481           for (unsigned i = 0; i != NumElts; ++i)
13482             Opnds.push_back(
13483                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
13484         }
13485       }
13486     }
13487 
13488     assert(VT.getVectorNumElements() == Opnds.size() &&
13489            "Concat vector type mismatch");
13490     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13491   }
13492 
13493   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
13494   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
13495     return V;
13496 
13497   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
13498   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
13499     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
13500       return V;
13501 
13502   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
13503   // nodes often generate nop CONCAT_VECTOR nodes.
13504   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
13505   // place the incoming vectors at the exact same location.
13506   SDValue SingleSource = SDValue();
13507   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
13508 
13509   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13510     SDValue Op = N->getOperand(i);
13511 
13512     if (Op.isUndef())
13513       continue;
13514 
13515     // Check if this is the identity extract:
13516     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13517       return SDValue();
13518 
13519     // Find the single incoming vector for the extract_subvector.
13520     if (SingleSource.getNode()) {
13521       if (Op.getOperand(0) != SingleSource)
13522         return SDValue();
13523     } else {
13524       SingleSource = Op.getOperand(0);
13525 
13526       // Check the source type is the same as the type of the result.
13527       // If not, this concat may extend the vector, so we can not
13528       // optimize it away.
13529       if (SingleSource.getValueType() != N->getValueType(0))
13530         return SDValue();
13531     }
13532 
13533     unsigned IdentityIndex = i * PartNumElem;
13534     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13535     // The extract index must be constant.
13536     if (!CS)
13537       return SDValue();
13538 
13539     // Check that we are reading from the identity index.
13540     if (CS->getZExtValue() != IdentityIndex)
13541       return SDValue();
13542   }
13543 
13544   if (SingleSource.getNode())
13545     return SingleSource;
13546 
13547   return SDValue();
13548 }
13549 
13550 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
13551   EVT NVT = N->getValueType(0);
13552   SDValue V = N->getOperand(0);
13553 
13554   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
13555     // Combine:
13556     //    (extract_subvec (concat V1, V2, ...), i)
13557     // Into:
13558     //    Vi if possible
13559     // Only operand 0 is checked as 'concat' assumes all inputs of the same
13560     // type.
13561     if (V->getOperand(0).getValueType() != NVT)
13562       return SDValue();
13563     unsigned Idx = N->getConstantOperandVal(1);
13564     unsigned NumElems = NVT.getVectorNumElements();
13565     assert((Idx % NumElems) == 0 &&
13566            "IDX in concat is not a multiple of the result vector length.");
13567     return V->getOperand(Idx / NumElems);
13568   }
13569 
13570   // Skip bitcasting
13571   if (V->getOpcode() == ISD::BITCAST)
13572     V = V.getOperand(0);
13573 
13574   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
13575     // Handle only simple case where vector being inserted and vector
13576     // being extracted are of same type, and are half size of larger vectors.
13577     EVT BigVT = V->getOperand(0).getValueType();
13578     EVT SmallVT = V->getOperand(1).getValueType();
13579     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
13580       return SDValue();
13581 
13582     // Only handle cases where both indexes are constants with the same type.
13583     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
13584     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
13585 
13586     if (InsIdx && ExtIdx &&
13587         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
13588         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
13589       // Combine:
13590       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
13591       // Into:
13592       //    indices are equal or bit offsets are equal => V1
13593       //    otherwise => (extract_subvec V1, ExtIdx)
13594       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
13595           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
13596         return DAG.getBitcast(NVT, V->getOperand(1));
13597       return DAG.getNode(
13598           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
13599           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
13600           N->getOperand(1));
13601     }
13602   }
13603 
13604   return SDValue();
13605 }
13606 
13607 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
13608                                                  SDValue V, SelectionDAG &DAG) {
13609   SDLoc DL(V);
13610   EVT VT = V.getValueType();
13611 
13612   switch (V.getOpcode()) {
13613   default:
13614     return V;
13615 
13616   case ISD::CONCAT_VECTORS: {
13617     EVT OpVT = V->getOperand(0).getValueType();
13618     int OpSize = OpVT.getVectorNumElements();
13619     SmallBitVector OpUsedElements(OpSize, false);
13620     bool FoundSimplification = false;
13621     SmallVector<SDValue, 4> NewOps;
13622     NewOps.reserve(V->getNumOperands());
13623     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
13624       SDValue Op = V->getOperand(i);
13625       bool OpUsed = false;
13626       for (int j = 0; j < OpSize; ++j)
13627         if (UsedElements[i * OpSize + j]) {
13628           OpUsedElements[j] = true;
13629           OpUsed = true;
13630         }
13631       NewOps.push_back(
13632           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
13633                  : DAG.getUNDEF(OpVT));
13634       FoundSimplification |= Op == NewOps.back();
13635       OpUsedElements.reset();
13636     }
13637     if (FoundSimplification)
13638       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
13639     return V;
13640   }
13641 
13642   case ISD::INSERT_SUBVECTOR: {
13643     SDValue BaseV = V->getOperand(0);
13644     SDValue SubV = V->getOperand(1);
13645     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
13646     if (!IdxN)
13647       return V;
13648 
13649     int SubSize = SubV.getValueType().getVectorNumElements();
13650     int Idx = IdxN->getZExtValue();
13651     bool SubVectorUsed = false;
13652     SmallBitVector SubUsedElements(SubSize, false);
13653     for (int i = 0; i < SubSize; ++i)
13654       if (UsedElements[i + Idx]) {
13655         SubVectorUsed = true;
13656         SubUsedElements[i] = true;
13657         UsedElements[i + Idx] = false;
13658       }
13659 
13660     // Now recurse on both the base and sub vectors.
13661     SDValue SimplifiedSubV =
13662         SubVectorUsed
13663             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
13664             : DAG.getUNDEF(SubV.getValueType());
13665     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
13666     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
13667       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
13668                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
13669     return V;
13670   }
13671   }
13672 }
13673 
13674 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
13675                                        SDValue N1, SelectionDAG &DAG) {
13676   EVT VT = SVN->getValueType(0);
13677   int NumElts = VT.getVectorNumElements();
13678   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
13679   for (int M : SVN->getMask())
13680     if (M >= 0 && M < NumElts)
13681       N0UsedElements[M] = true;
13682     else if (M >= NumElts)
13683       N1UsedElements[M - NumElts] = true;
13684 
13685   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
13686   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
13687   if (S0 == N0 && S1 == N1)
13688     return SDValue();
13689 
13690   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
13691 }
13692 
13693 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
13694 // or turn a shuffle of a single concat into simpler shuffle then concat.
13695 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
13696   EVT VT = N->getValueType(0);
13697   unsigned NumElts = VT.getVectorNumElements();
13698 
13699   SDValue N0 = N->getOperand(0);
13700   SDValue N1 = N->getOperand(1);
13701   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13702 
13703   SmallVector<SDValue, 4> Ops;
13704   EVT ConcatVT = N0.getOperand(0).getValueType();
13705   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
13706   unsigned NumConcats = NumElts / NumElemsPerConcat;
13707 
13708   // Special case: shuffle(concat(A,B)) can be more efficiently represented
13709   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
13710   // half vector elements.
13711   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
13712       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
13713                   SVN->getMask().end(), [](int i) { return i == -1; })) {
13714     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
13715                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
13716     N1 = DAG.getUNDEF(ConcatVT);
13717     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
13718   }
13719 
13720   // Look at every vector that's inserted. We're looking for exact
13721   // subvector-sized copies from a concatenated vector
13722   for (unsigned I = 0; I != NumConcats; ++I) {
13723     // Make sure we're dealing with a copy.
13724     unsigned Begin = I * NumElemsPerConcat;
13725     bool AllUndef = true, NoUndef = true;
13726     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
13727       if (SVN->getMaskElt(J) >= 0)
13728         AllUndef = false;
13729       else
13730         NoUndef = false;
13731     }
13732 
13733     if (NoUndef) {
13734       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
13735         return SDValue();
13736 
13737       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
13738         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
13739           return SDValue();
13740 
13741       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
13742       if (FirstElt < N0.getNumOperands())
13743         Ops.push_back(N0.getOperand(FirstElt));
13744       else
13745         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
13746 
13747     } else if (AllUndef) {
13748       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
13749     } else { // Mixed with general masks and undefs, can't do optimization.
13750       return SDValue();
13751     }
13752   }
13753 
13754   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
13755 }
13756 
13757 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
13758   EVT VT = N->getValueType(0);
13759   unsigned NumElts = VT.getVectorNumElements();
13760 
13761   SDValue N0 = N->getOperand(0);
13762   SDValue N1 = N->getOperand(1);
13763 
13764   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
13765 
13766   // Canonicalize shuffle undef, undef -> undef
13767   if (N0.isUndef() && N1.isUndef())
13768     return DAG.getUNDEF(VT);
13769 
13770   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13771 
13772   // Canonicalize shuffle v, v -> v, undef
13773   if (N0 == N1) {
13774     SmallVector<int, 8> NewMask;
13775     for (unsigned i = 0; i != NumElts; ++i) {
13776       int Idx = SVN->getMaskElt(i);
13777       if (Idx >= (int)NumElts) Idx -= NumElts;
13778       NewMask.push_back(Idx);
13779     }
13780     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
13781   }
13782 
13783   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
13784   if (N0.isUndef())
13785     return DAG.getCommutedVectorShuffle(*SVN);
13786 
13787   // Remove references to rhs if it is undef
13788   if (N1.isUndef()) {
13789     bool Changed = false;
13790     SmallVector<int, 8> NewMask;
13791     for (unsigned i = 0; i != NumElts; ++i) {
13792       int Idx = SVN->getMaskElt(i);
13793       if (Idx >= (int)NumElts) {
13794         Idx = -1;
13795         Changed = true;
13796       }
13797       NewMask.push_back(Idx);
13798     }
13799     if (Changed)
13800       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
13801   }
13802 
13803   // If it is a splat, check if the argument vector is another splat or a
13804   // build_vector.
13805   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
13806     SDNode *V = N0.getNode();
13807 
13808     // If this is a bit convert that changes the element type of the vector but
13809     // not the number of vector elements, look through it.  Be careful not to
13810     // look though conversions that change things like v4f32 to v2f64.
13811     if (V->getOpcode() == ISD::BITCAST) {
13812       SDValue ConvInput = V->getOperand(0);
13813       if (ConvInput.getValueType().isVector() &&
13814           ConvInput.getValueType().getVectorNumElements() == NumElts)
13815         V = ConvInput.getNode();
13816     }
13817 
13818     if (V->getOpcode() == ISD::BUILD_VECTOR) {
13819       assert(V->getNumOperands() == NumElts &&
13820              "BUILD_VECTOR has wrong number of operands");
13821       SDValue Base;
13822       bool AllSame = true;
13823       for (unsigned i = 0; i != NumElts; ++i) {
13824         if (!V->getOperand(i).isUndef()) {
13825           Base = V->getOperand(i);
13826           break;
13827         }
13828       }
13829       // Splat of <u, u, u, u>, return <u, u, u, u>
13830       if (!Base.getNode())
13831         return N0;
13832       for (unsigned i = 0; i != NumElts; ++i) {
13833         if (V->getOperand(i) != Base) {
13834           AllSame = false;
13835           break;
13836         }
13837       }
13838       // Splat of <x, x, x, x>, return <x, x, x, x>
13839       if (AllSame)
13840         return N0;
13841 
13842       // Canonicalize any other splat as a build_vector.
13843       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
13844       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
13845       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
13846 
13847       // We may have jumped through bitcasts, so the type of the
13848       // BUILD_VECTOR may not match the type of the shuffle.
13849       if (V->getValueType(0) != VT)
13850         NewBV = DAG.getBitcast(VT, NewBV);
13851       return NewBV;
13852     }
13853   }
13854 
13855   // There are various patterns used to build up a vector from smaller vectors,
13856   // subvectors, or elements. Scan chains of these and replace unused insertions
13857   // or components with undef.
13858   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
13859     return S;
13860 
13861   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
13862       Level < AfterLegalizeVectorOps &&
13863       (N1.isUndef() ||
13864       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13865        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
13866     if (SDValue V = partitionShuffleOfConcats(N, DAG))
13867       return V;
13868   }
13869 
13870   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
13871   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
13872   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
13873     SmallVector<SDValue, 8> Ops;
13874     for (int M : SVN->getMask()) {
13875       SDValue Op = DAG.getUNDEF(VT.getScalarType());
13876       if (M >= 0) {
13877         int Idx = M % NumElts;
13878         SDValue &S = (M < (int)NumElts ? N0 : N1);
13879         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
13880           Op = S.getOperand(Idx);
13881         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
13882           if (Idx == 0)
13883             Op = S.getOperand(0);
13884         } else {
13885           // Operand can't be combined - bail out.
13886           break;
13887         }
13888       }
13889       Ops.push_back(Op);
13890     }
13891     if (Ops.size() == VT.getVectorNumElements()) {
13892       // BUILD_VECTOR requires all inputs to be of the same type, find the
13893       // maximum type and extend them all.
13894       EVT SVT = VT.getScalarType();
13895       if (SVT.isInteger())
13896         for (SDValue &Op : Ops)
13897           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
13898       if (SVT != VT.getScalarType())
13899         for (SDValue &Op : Ops)
13900           Op = TLI.isZExtFree(Op.getValueType(), SVT)
13901                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
13902                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
13903       return DAG.getBuildVector(VT, SDLoc(N), Ops);
13904     }
13905   }
13906 
13907   // If this shuffle only has a single input that is a bitcasted shuffle,
13908   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
13909   // back to their original types.
13910   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13911       N1.isUndef() && Level < AfterLegalizeVectorOps &&
13912       TLI.isTypeLegal(VT)) {
13913 
13914     // Peek through the bitcast only if there is one user.
13915     SDValue BC0 = N0;
13916     while (BC0.getOpcode() == ISD::BITCAST) {
13917       if (!BC0.hasOneUse())
13918         break;
13919       BC0 = BC0.getOperand(0);
13920     }
13921 
13922     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
13923       if (Scale == 1)
13924         return SmallVector<int, 8>(Mask.begin(), Mask.end());
13925 
13926       SmallVector<int, 8> NewMask;
13927       for (int M : Mask)
13928         for (int s = 0; s != Scale; ++s)
13929           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
13930       return NewMask;
13931     };
13932 
13933     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
13934       EVT SVT = VT.getScalarType();
13935       EVT InnerVT = BC0->getValueType(0);
13936       EVT InnerSVT = InnerVT.getScalarType();
13937 
13938       // Determine which shuffle works with the smaller scalar type.
13939       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
13940       EVT ScaleSVT = ScaleVT.getScalarType();
13941 
13942       if (TLI.isTypeLegal(ScaleVT) &&
13943           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
13944           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
13945 
13946         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13947         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13948 
13949         // Scale the shuffle masks to the smaller scalar type.
13950         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
13951         SmallVector<int, 8> InnerMask =
13952             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
13953         SmallVector<int, 8> OuterMask =
13954             ScaleShuffleMask(SVN->getMask(), OuterScale);
13955 
13956         // Merge the shuffle masks.
13957         SmallVector<int, 8> NewMask;
13958         for (int M : OuterMask)
13959           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
13960 
13961         // Test for shuffle mask legality over both commutations.
13962         SDValue SV0 = BC0->getOperand(0);
13963         SDValue SV1 = BC0->getOperand(1);
13964         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13965         if (!LegalMask) {
13966           std::swap(SV0, SV1);
13967           ShuffleVectorSDNode::commuteMask(NewMask);
13968           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13969         }
13970 
13971         if (LegalMask) {
13972           SV0 = DAG.getBitcast(ScaleVT, SV0);
13973           SV1 = DAG.getBitcast(ScaleVT, SV1);
13974           return DAG.getBitcast(
13975               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
13976         }
13977       }
13978     }
13979   }
13980 
13981   // Canonicalize shuffles according to rules:
13982   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
13983   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
13984   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
13985   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
13986       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
13987       TLI.isTypeLegal(VT)) {
13988     // The incoming shuffle must be of the same type as the result of the
13989     // current shuffle.
13990     assert(N1->getOperand(0).getValueType() == VT &&
13991            "Shuffle types don't match");
13992 
13993     SDValue SV0 = N1->getOperand(0);
13994     SDValue SV1 = N1->getOperand(1);
13995     bool HasSameOp0 = N0 == SV0;
13996     bool IsSV1Undef = SV1.isUndef();
13997     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
13998       // Commute the operands of this shuffle so that next rule
13999       // will trigger.
14000       return DAG.getCommutedVectorShuffle(*SVN);
14001   }
14002 
14003   // Try to fold according to rules:
14004   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14005   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14006   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14007   // Don't try to fold shuffles with illegal type.
14008   // Only fold if this shuffle is the only user of the other shuffle.
14009   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
14010       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
14011     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
14012 
14013     // The incoming shuffle must be of the same type as the result of the
14014     // current shuffle.
14015     assert(OtherSV->getOperand(0).getValueType() == VT &&
14016            "Shuffle types don't match");
14017 
14018     SDValue SV0, SV1;
14019     SmallVector<int, 4> Mask;
14020     // Compute the combined shuffle mask for a shuffle with SV0 as the first
14021     // operand, and SV1 as the second operand.
14022     for (unsigned i = 0; i != NumElts; ++i) {
14023       int Idx = SVN->getMaskElt(i);
14024       if (Idx < 0) {
14025         // Propagate Undef.
14026         Mask.push_back(Idx);
14027         continue;
14028       }
14029 
14030       SDValue CurrentVec;
14031       if (Idx < (int)NumElts) {
14032         // This shuffle index refers to the inner shuffle N0. Lookup the inner
14033         // shuffle mask to identify which vector is actually referenced.
14034         Idx = OtherSV->getMaskElt(Idx);
14035         if (Idx < 0) {
14036           // Propagate Undef.
14037           Mask.push_back(Idx);
14038           continue;
14039         }
14040 
14041         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
14042                                            : OtherSV->getOperand(1);
14043       } else {
14044         // This shuffle index references an element within N1.
14045         CurrentVec = N1;
14046       }
14047 
14048       // Simple case where 'CurrentVec' is UNDEF.
14049       if (CurrentVec.isUndef()) {
14050         Mask.push_back(-1);
14051         continue;
14052       }
14053 
14054       // Canonicalize the shuffle index. We don't know yet if CurrentVec
14055       // will be the first or second operand of the combined shuffle.
14056       Idx = Idx % NumElts;
14057       if (!SV0.getNode() || SV0 == CurrentVec) {
14058         // Ok. CurrentVec is the left hand side.
14059         // Update the mask accordingly.
14060         SV0 = CurrentVec;
14061         Mask.push_back(Idx);
14062         continue;
14063       }
14064 
14065       // Bail out if we cannot convert the shuffle pair into a single shuffle.
14066       if (SV1.getNode() && SV1 != CurrentVec)
14067         return SDValue();
14068 
14069       // Ok. CurrentVec is the right hand side.
14070       // Update the mask accordingly.
14071       SV1 = CurrentVec;
14072       Mask.push_back(Idx + NumElts);
14073     }
14074 
14075     // Check if all indices in Mask are Undef. In case, propagate Undef.
14076     bool isUndefMask = true;
14077     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
14078       isUndefMask &= Mask[i] < 0;
14079 
14080     if (isUndefMask)
14081       return DAG.getUNDEF(VT);
14082 
14083     if (!SV0.getNode())
14084       SV0 = DAG.getUNDEF(VT);
14085     if (!SV1.getNode())
14086       SV1 = DAG.getUNDEF(VT);
14087 
14088     // Avoid introducing shuffles with illegal mask.
14089     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
14090       ShuffleVectorSDNode::commuteMask(Mask);
14091 
14092       if (!TLI.isShuffleMaskLegal(Mask, VT))
14093         return SDValue();
14094 
14095       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
14096       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
14097       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
14098       std::swap(SV0, SV1);
14099     }
14100 
14101     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14102     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14103     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14104     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
14105   }
14106 
14107   return SDValue();
14108 }
14109 
14110 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
14111   SDValue InVal = N->getOperand(0);
14112   EVT VT = N->getValueType(0);
14113 
14114   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
14115   // with a VECTOR_SHUFFLE.
14116   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14117     SDValue InVec = InVal->getOperand(0);
14118     SDValue EltNo = InVal->getOperand(1);
14119 
14120     // FIXME: We could support implicit truncation if the shuffle can be
14121     // scaled to a smaller vector scalar type.
14122     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
14123     if (C0 && VT == InVec.getValueType() &&
14124         VT.getScalarType() == InVal.getValueType()) {
14125       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
14126       int Elt = C0->getZExtValue();
14127       NewMask[0] = Elt;
14128 
14129       if (TLI.isShuffleMaskLegal(NewMask, VT))
14130         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
14131                                     NewMask);
14132     }
14133   }
14134 
14135   return SDValue();
14136 }
14137 
14138 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
14139   EVT VT = N->getValueType(0);
14140   SDValue N0 = N->getOperand(0);
14141   SDValue N1 = N->getOperand(1);
14142   SDValue N2 = N->getOperand(2);
14143 
14144   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
14145   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
14146   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
14147   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
14148       N0.getOperand(1).getValueType() == N1.getValueType() &&
14149       N0.getOperand(2) == N2)
14150     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
14151                        N1, N2);
14152 
14153   if (N0.getValueType() != N1.getValueType())
14154     return SDValue();
14155 
14156   // If the input vector is a concatenation, and the insert replaces
14157   // one of the halves, we can optimize into a single concat_vectors.
14158   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
14159       N2.getOpcode() == ISD::Constant) {
14160     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
14161 
14162     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14163     // (concat_vectors Z, Y)
14164     if (InsIdx == 0)
14165       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
14166                          N0.getOperand(1));
14167 
14168     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14169     // (concat_vectors X, Z)
14170     if (InsIdx == VT.getVectorNumElements() / 2)
14171       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
14172                          N1);
14173   }
14174 
14175   return SDValue();
14176 }
14177 
14178 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
14179   SDValue N0 = N->getOperand(0);
14180 
14181   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
14182   if (N0->getOpcode() == ISD::FP16_TO_FP)
14183     return N0->getOperand(0);
14184 
14185   return SDValue();
14186 }
14187 
14188 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
14189   SDValue N0 = N->getOperand(0);
14190 
14191   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
14192   if (N0->getOpcode() == ISD::AND) {
14193     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
14194     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
14195       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
14196                          N0.getOperand(0));
14197     }
14198   }
14199 
14200   return SDValue();
14201 }
14202 
14203 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
14204 /// with the destination vector and a zero vector.
14205 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
14206 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
14207 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
14208   EVT VT = N->getValueType(0);
14209   SDValue LHS = N->getOperand(0);
14210   SDValue RHS = N->getOperand(1);
14211   SDLoc DL(N);
14212 
14213   // Make sure we're not running after operation legalization where it
14214   // may have custom lowered the vector shuffles.
14215   if (LegalOperations)
14216     return SDValue();
14217 
14218   if (N->getOpcode() != ISD::AND)
14219     return SDValue();
14220 
14221   if (RHS.getOpcode() == ISD::BITCAST)
14222     RHS = RHS.getOperand(0);
14223 
14224   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
14225     return SDValue();
14226 
14227   EVT RVT = RHS.getValueType();
14228   unsigned NumElts = RHS.getNumOperands();
14229 
14230   // Attempt to create a valid clear mask, splitting the mask into
14231   // sub elements and checking to see if each is
14232   // all zeros or all ones - suitable for shuffle masking.
14233   auto BuildClearMask = [&](int Split) {
14234     int NumSubElts = NumElts * Split;
14235     int NumSubBits = RVT.getScalarSizeInBits() / Split;
14236 
14237     SmallVector<int, 8> Indices;
14238     for (int i = 0; i != NumSubElts; ++i) {
14239       int EltIdx = i / Split;
14240       int SubIdx = i % Split;
14241       SDValue Elt = RHS.getOperand(EltIdx);
14242       if (Elt.isUndef()) {
14243         Indices.push_back(-1);
14244         continue;
14245       }
14246 
14247       APInt Bits;
14248       if (isa<ConstantSDNode>(Elt))
14249         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
14250       else if (isa<ConstantFPSDNode>(Elt))
14251         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
14252       else
14253         return SDValue();
14254 
14255       // Extract the sub element from the constant bit mask.
14256       if (DAG.getDataLayout().isBigEndian()) {
14257         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
14258       } else {
14259         Bits = Bits.lshr(SubIdx * NumSubBits);
14260       }
14261 
14262       if (Split > 1)
14263         Bits = Bits.trunc(NumSubBits);
14264 
14265       if (Bits.isAllOnesValue())
14266         Indices.push_back(i);
14267       else if (Bits == 0)
14268         Indices.push_back(i + NumSubElts);
14269       else
14270         return SDValue();
14271     }
14272 
14273     // Let's see if the target supports this vector_shuffle.
14274     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
14275     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
14276     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
14277       return SDValue();
14278 
14279     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
14280     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
14281                                                    DAG.getBitcast(ClearVT, LHS),
14282                                                    Zero, Indices));
14283   };
14284 
14285   // Determine maximum split level (byte level masking).
14286   int MaxSplit = 1;
14287   if (RVT.getScalarSizeInBits() % 8 == 0)
14288     MaxSplit = RVT.getScalarSizeInBits() / 8;
14289 
14290   for (int Split = 1; Split <= MaxSplit; ++Split)
14291     if (RVT.getScalarSizeInBits() % Split == 0)
14292       if (SDValue S = BuildClearMask(Split))
14293         return S;
14294 
14295   return SDValue();
14296 }
14297 
14298 /// Visit a binary vector operation, like ADD.
14299 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
14300   assert(N->getValueType(0).isVector() &&
14301          "SimplifyVBinOp only works on vectors!");
14302 
14303   SDValue LHS = N->getOperand(0);
14304   SDValue RHS = N->getOperand(1);
14305   SDValue Ops[] = {LHS, RHS};
14306 
14307   // See if we can constant fold the vector operation.
14308   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
14309           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
14310     return Fold;
14311 
14312   // Try to convert a constant mask AND into a shuffle clear mask.
14313   if (SDValue Shuffle = XformToShuffleWithZero(N))
14314     return Shuffle;
14315 
14316   // Type legalization might introduce new shuffles in the DAG.
14317   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
14318   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
14319   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
14320       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
14321       LHS.getOperand(1).isUndef() &&
14322       RHS.getOperand(1).isUndef()) {
14323     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
14324     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
14325 
14326     if (SVN0->getMask().equals(SVN1->getMask())) {
14327       EVT VT = N->getValueType(0);
14328       SDValue UndefVector = LHS.getOperand(1);
14329       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
14330                                      LHS.getOperand(0), RHS.getOperand(0),
14331                                      N->getFlags());
14332       AddUsersToWorklist(N);
14333       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
14334                                   SVN0->getMask());
14335     }
14336   }
14337 
14338   return SDValue();
14339 }
14340 
14341 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
14342                                     SDValue N2) {
14343   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
14344 
14345   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
14346                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
14347 
14348   // If we got a simplified select_cc node back from SimplifySelectCC, then
14349   // break it down into a new SETCC node, and a new SELECT node, and then return
14350   // the SELECT node, since we were called with a SELECT node.
14351   if (SCC.getNode()) {
14352     // Check to see if we got a select_cc back (to turn into setcc/select).
14353     // Otherwise, just return whatever node we got back, like fabs.
14354     if (SCC.getOpcode() == ISD::SELECT_CC) {
14355       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
14356                                   N0.getValueType(),
14357                                   SCC.getOperand(0), SCC.getOperand(1),
14358                                   SCC.getOperand(4));
14359       AddToWorklist(SETCC.getNode());
14360       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
14361                            SCC.getOperand(2), SCC.getOperand(3));
14362     }
14363 
14364     return SCC;
14365   }
14366   return SDValue();
14367 }
14368 
14369 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
14370 /// being selected between, see if we can simplify the select.  Callers of this
14371 /// should assume that TheSelect is deleted if this returns true.  As such, they
14372 /// should return the appropriate thing (e.g. the node) back to the top-level of
14373 /// the DAG combiner loop to avoid it being looked at.
14374 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
14375                                     SDValue RHS) {
14376 
14377   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14378   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
14379   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
14380     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
14381       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
14382       SDValue Sqrt = RHS;
14383       ISD::CondCode CC;
14384       SDValue CmpLHS;
14385       const ConstantFPSDNode *Zero = nullptr;
14386 
14387       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
14388         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
14389         CmpLHS = TheSelect->getOperand(0);
14390         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
14391       } else {
14392         // SELECT or VSELECT
14393         SDValue Cmp = TheSelect->getOperand(0);
14394         if (Cmp.getOpcode() == ISD::SETCC) {
14395           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
14396           CmpLHS = Cmp.getOperand(0);
14397           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
14398         }
14399       }
14400       if (Zero && Zero->isZero() &&
14401           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
14402           CC == ISD::SETULT || CC == ISD::SETLT)) {
14403         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14404         CombineTo(TheSelect, Sqrt);
14405         return true;
14406       }
14407     }
14408   }
14409   // Cannot simplify select with vector condition
14410   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
14411 
14412   // If this is a select from two identical things, try to pull the operation
14413   // through the select.
14414   if (LHS.getOpcode() != RHS.getOpcode() ||
14415       !LHS.hasOneUse() || !RHS.hasOneUse())
14416     return false;
14417 
14418   // If this is a load and the token chain is identical, replace the select
14419   // of two loads with a load through a select of the address to load from.
14420   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
14421   // constants have been dropped into the constant pool.
14422   if (LHS.getOpcode() == ISD::LOAD) {
14423     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
14424     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
14425 
14426     // Token chains must be identical.
14427     if (LHS.getOperand(0) != RHS.getOperand(0) ||
14428         // Do not let this transformation reduce the number of volatile loads.
14429         LLD->isVolatile() || RLD->isVolatile() ||
14430         // FIXME: If either is a pre/post inc/dec load,
14431         // we'd need to split out the address adjustment.
14432         LLD->isIndexed() || RLD->isIndexed() ||
14433         // If this is an EXTLOAD, the VT's must match.
14434         LLD->getMemoryVT() != RLD->getMemoryVT() ||
14435         // If this is an EXTLOAD, the kind of extension must match.
14436         (LLD->getExtensionType() != RLD->getExtensionType() &&
14437          // The only exception is if one of the extensions is anyext.
14438          LLD->getExtensionType() != ISD::EXTLOAD &&
14439          RLD->getExtensionType() != ISD::EXTLOAD) ||
14440         // FIXME: this discards src value information.  This is
14441         // over-conservative. It would be beneficial to be able to remember
14442         // both potential memory locations.  Since we are discarding
14443         // src value info, don't do the transformation if the memory
14444         // locations are not in the default address space.
14445         LLD->getPointerInfo().getAddrSpace() != 0 ||
14446         RLD->getPointerInfo().getAddrSpace() != 0 ||
14447         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
14448                                       LLD->getBasePtr().getValueType()))
14449       return false;
14450 
14451     // Check that the select condition doesn't reach either load.  If so,
14452     // folding this will induce a cycle into the DAG.  If not, this is safe to
14453     // xform, so create a select of the addresses.
14454     SDValue Addr;
14455     if (TheSelect->getOpcode() == ISD::SELECT) {
14456       SDNode *CondNode = TheSelect->getOperand(0).getNode();
14457       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
14458           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
14459         return false;
14460       // The loads must not depend on one another.
14461       if (LLD->isPredecessorOf(RLD) ||
14462           RLD->isPredecessorOf(LLD))
14463         return false;
14464       Addr = DAG.getSelect(SDLoc(TheSelect),
14465                            LLD->getBasePtr().getValueType(),
14466                            TheSelect->getOperand(0), LLD->getBasePtr(),
14467                            RLD->getBasePtr());
14468     } else {  // Otherwise SELECT_CC
14469       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
14470       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
14471 
14472       if ((LLD->hasAnyUseOfValue(1) &&
14473            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
14474           (RLD->hasAnyUseOfValue(1) &&
14475            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
14476         return false;
14477 
14478       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
14479                          LLD->getBasePtr().getValueType(),
14480                          TheSelect->getOperand(0),
14481                          TheSelect->getOperand(1),
14482                          LLD->getBasePtr(), RLD->getBasePtr(),
14483                          TheSelect->getOperand(4));
14484     }
14485 
14486     SDValue Load;
14487     // It is safe to replace the two loads if they have different alignments,
14488     // but the new load must be the minimum (most restrictive) alignment of the
14489     // inputs.
14490     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
14491     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
14492     if (!RLD->isInvariant())
14493       MMOFlags &= ~MachineMemOperand::MOInvariant;
14494     if (!RLD->isDereferenceable())
14495       MMOFlags &= ~MachineMemOperand::MODereferenceable;
14496     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
14497       // FIXME: Discards pointer and AA info.
14498       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
14499                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
14500                          MMOFlags);
14501     } else {
14502       // FIXME: Discards pointer and AA info.
14503       Load = DAG.getExtLoad(
14504           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
14505                                                   : LLD->getExtensionType(),
14506           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
14507           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
14508     }
14509 
14510     // Users of the select now use the result of the load.
14511     CombineTo(TheSelect, Load);
14512 
14513     // Users of the old loads now use the new load's chain.  We know the
14514     // old-load value is dead now.
14515     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
14516     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
14517     return true;
14518   }
14519 
14520   return false;
14521 }
14522 
14523 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
14524 /// where 'cond' is the comparison specified by CC.
14525 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
14526                                       SDValue N2, SDValue N3, ISD::CondCode CC,
14527                                       bool NotExtCompare) {
14528   // (x ? y : y) -> y.
14529   if (N2 == N3) return N2;
14530 
14531   EVT VT = N2.getValueType();
14532   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
14533   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
14534 
14535   // Determine if the condition we're dealing with is constant
14536   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
14537                               N0, N1, CC, DL, false);
14538   if (SCC.getNode()) AddToWorklist(SCC.getNode());
14539 
14540   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
14541     // fold select_cc true, x, y -> x
14542     // fold select_cc false, x, y -> y
14543     return !SCCC->isNullValue() ? N2 : N3;
14544   }
14545 
14546   // Check to see if we can simplify the select into an fabs node
14547   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14548     // Allow either -0.0 or 0.0
14549     if (CFP->isZero()) {
14550       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
14551       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
14552           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
14553           N2 == N3.getOperand(0))
14554         return DAG.getNode(ISD::FABS, DL, VT, N0);
14555 
14556       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
14557       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
14558           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
14559           N2.getOperand(0) == N3)
14560         return DAG.getNode(ISD::FABS, DL, VT, N3);
14561     }
14562   }
14563 
14564   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
14565   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
14566   // in it.  This is a win when the constant is not otherwise available because
14567   // it replaces two constant pool loads with one.  We only do this if the FP
14568   // type is known to be legal, because if it isn't, then we are before legalize
14569   // types an we want the other legalization to happen first (e.g. to avoid
14570   // messing with soft float) and if the ConstantFP is not legal, because if
14571   // it is legal, we may not need to store the FP constant in a constant pool.
14572   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
14573     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
14574       if (TLI.isTypeLegal(N2.getValueType()) &&
14575           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
14576                TargetLowering::Legal &&
14577            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
14578            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
14579           // If both constants have multiple uses, then we won't need to do an
14580           // extra load, they are likely around in registers for other users.
14581           (TV->hasOneUse() || FV->hasOneUse())) {
14582         Constant *Elts[] = {
14583           const_cast<ConstantFP*>(FV->getConstantFPValue()),
14584           const_cast<ConstantFP*>(TV->getConstantFPValue())
14585         };
14586         Type *FPTy = Elts[0]->getType();
14587         const DataLayout &TD = DAG.getDataLayout();
14588 
14589         // Create a ConstantArray of the two constants.
14590         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
14591         SDValue CPIdx =
14592             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
14593                                 TD.getPrefTypeAlignment(FPTy));
14594         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
14595 
14596         // Get the offsets to the 0 and 1 element of the array so that we can
14597         // select between them.
14598         SDValue Zero = DAG.getIntPtrConstant(0, DL);
14599         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
14600         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
14601 
14602         SDValue Cond = DAG.getSetCC(DL,
14603                                     getSetCCResultType(N0.getValueType()),
14604                                     N0, N1, CC);
14605         AddToWorklist(Cond.getNode());
14606         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
14607                                           Cond, One, Zero);
14608         AddToWorklist(CstOffset.getNode());
14609         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
14610                             CstOffset);
14611         AddToWorklist(CPIdx.getNode());
14612         return DAG.getLoad(
14613             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
14614             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
14615             Alignment);
14616       }
14617     }
14618 
14619   // Check to see if we can perform the "gzip trick", transforming
14620   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
14621   if (isNullConstant(N3) && CC == ISD::SETLT &&
14622       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
14623        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
14624     EVT XType = N0.getValueType();
14625     EVT AType = N2.getValueType();
14626     if (XType.bitsGE(AType)) {
14627       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
14628       // single-bit constant.
14629       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
14630         unsigned ShCtV = N2C->getAPIntValue().logBase2();
14631         ShCtV = XType.getSizeInBits() - ShCtV - 1;
14632         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
14633                                        getShiftAmountTy(N0.getValueType()));
14634         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
14635                                     XType, N0, ShCt);
14636         AddToWorklist(Shift.getNode());
14637 
14638         if (XType.bitsGT(AType)) {
14639           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14640           AddToWorklist(Shift.getNode());
14641         }
14642 
14643         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14644       }
14645 
14646       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
14647                                   XType, N0,
14648                                   DAG.getConstant(XType.getSizeInBits() - 1,
14649                                                   SDLoc(N0),
14650                                          getShiftAmountTy(N0.getValueType())));
14651       AddToWorklist(Shift.getNode());
14652 
14653       if (XType.bitsGT(AType)) {
14654         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14655         AddToWorklist(Shift.getNode());
14656       }
14657 
14658       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14659     }
14660   }
14661 
14662   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
14663   // where y is has a single bit set.
14664   // A plaintext description would be, we can turn the SELECT_CC into an AND
14665   // when the condition can be materialized as an all-ones register.  Any
14666   // single bit-test can be materialized as an all-ones register with
14667   // shift-left and shift-right-arith.
14668   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
14669       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
14670     SDValue AndLHS = N0->getOperand(0);
14671     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14672     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
14673       // Shift the tested bit over the sign bit.
14674       const APInt &AndMask = ConstAndRHS->getAPIntValue();
14675       SDValue ShlAmt =
14676         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
14677                         getShiftAmountTy(AndLHS.getValueType()));
14678       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
14679 
14680       // Now arithmetic right shift it all the way over, so the result is either
14681       // all-ones, or zero.
14682       SDValue ShrAmt =
14683         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
14684                         getShiftAmountTy(Shl.getValueType()));
14685       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
14686 
14687       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
14688     }
14689   }
14690 
14691   // fold select C, 16, 0 -> shl C, 4
14692   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
14693       TLI.getBooleanContents(N0.getValueType()) ==
14694           TargetLowering::ZeroOrOneBooleanContent) {
14695 
14696     // If the caller doesn't want us to simplify this into a zext of a compare,
14697     // don't do it.
14698     if (NotExtCompare && N2C->isOne())
14699       return SDValue();
14700 
14701     // Get a SetCC of the condition
14702     // NOTE: Don't create a SETCC if it's not legal on this target.
14703     if (!LegalOperations ||
14704         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
14705       SDValue Temp, SCC;
14706       // cast from setcc result type to select result type
14707       if (LegalTypes) {
14708         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
14709                             N0, N1, CC);
14710         if (N2.getValueType().bitsLT(SCC.getValueType()))
14711           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
14712                                         N2.getValueType());
14713         else
14714           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14715                              N2.getValueType(), SCC);
14716       } else {
14717         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
14718         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14719                            N2.getValueType(), SCC);
14720       }
14721 
14722       AddToWorklist(SCC.getNode());
14723       AddToWorklist(Temp.getNode());
14724 
14725       if (N2C->isOne())
14726         return Temp;
14727 
14728       // shl setcc result by log2 n2c
14729       return DAG.getNode(
14730           ISD::SHL, DL, N2.getValueType(), Temp,
14731           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
14732                           getShiftAmountTy(Temp.getValueType())));
14733     }
14734   }
14735 
14736   // Check to see if this is an integer abs.
14737   // select_cc setg[te] X,  0,  X, -X ->
14738   // select_cc setgt    X, -1,  X, -X ->
14739   // select_cc setl[te] X,  0, -X,  X ->
14740   // select_cc setlt    X,  1, -X,  X ->
14741   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14742   if (N1C) {
14743     ConstantSDNode *SubC = nullptr;
14744     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14745          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
14746         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
14747       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
14748     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
14749               (N1C->isOne() && CC == ISD::SETLT)) &&
14750              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
14751       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
14752 
14753     EVT XType = N0.getValueType();
14754     if (SubC && SubC->isNullValue() && XType.isInteger()) {
14755       SDLoc DL(N0);
14756       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
14757                                   N0,
14758                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
14759                                          getShiftAmountTy(N0.getValueType())));
14760       SDValue Add = DAG.getNode(ISD::ADD, DL,
14761                                 XType, N0, Shift);
14762       AddToWorklist(Shift.getNode());
14763       AddToWorklist(Add.getNode());
14764       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
14765     }
14766   }
14767 
14768   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
14769   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
14770   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
14771   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
14772   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
14773   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
14774   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
14775   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
14776   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
14777     SDValue ValueOnZero = N2;
14778     SDValue Count = N3;
14779     // If the condition is NE instead of E, swap the operands.
14780     if (CC == ISD::SETNE)
14781       std::swap(ValueOnZero, Count);
14782     // Check if the value on zero is a constant equal to the bits in the type.
14783     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
14784       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
14785         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
14786         // legal, combine to just cttz.
14787         if ((Count.getOpcode() == ISD::CTTZ ||
14788              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
14789             N0 == Count.getOperand(0) &&
14790             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
14791           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
14792         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
14793         // legal, combine to just ctlz.
14794         if ((Count.getOpcode() == ISD::CTLZ ||
14795              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
14796             N0 == Count.getOperand(0) &&
14797             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
14798           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
14799       }
14800     }
14801   }
14802 
14803   return SDValue();
14804 }
14805 
14806 /// This is a stub for TargetLowering::SimplifySetCC.
14807 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
14808                                    ISD::CondCode Cond, const SDLoc &DL,
14809                                    bool foldBooleans) {
14810   TargetLowering::DAGCombinerInfo
14811     DagCombineInfo(DAG, Level, false, this);
14812   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
14813 }
14814 
14815 /// Given an ISD::SDIV node expressing a divide by constant, return
14816 /// a DAG expression to select that will generate the same value by multiplying
14817 /// by a magic number.
14818 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14819 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
14820   // when optimising for minimum size, we don't want to expand a div to a mul
14821   // and a shift.
14822   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14823     return SDValue();
14824 
14825   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14826   if (!C)
14827     return SDValue();
14828 
14829   // Avoid division by zero.
14830   if (C->isNullValue())
14831     return SDValue();
14832 
14833   std::vector<SDNode*> Built;
14834   SDValue S =
14835       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14836 
14837   for (SDNode *N : Built)
14838     AddToWorklist(N);
14839   return S;
14840 }
14841 
14842 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
14843 /// DAG expression that will generate the same value by right shifting.
14844 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
14845   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14846   if (!C)
14847     return SDValue();
14848 
14849   // Avoid division by zero.
14850   if (C->isNullValue())
14851     return SDValue();
14852 
14853   std::vector<SDNode *> Built;
14854   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
14855 
14856   for (SDNode *N : Built)
14857     AddToWorklist(N);
14858   return S;
14859 }
14860 
14861 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
14862 /// expression that will generate the same value by multiplying by a magic
14863 /// number.
14864 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14865 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
14866   // when optimising for minimum size, we don't want to expand a div to a mul
14867   // and a shift.
14868   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14869     return SDValue();
14870 
14871   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14872   if (!C)
14873     return SDValue();
14874 
14875   // Avoid division by zero.
14876   if (C->isNullValue())
14877     return SDValue();
14878 
14879   std::vector<SDNode*> Built;
14880   SDValue S =
14881       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14882 
14883   for (SDNode *N : Built)
14884     AddToWorklist(N);
14885   return S;
14886 }
14887 
14888 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
14889   if (Level >= AfterLegalizeDAG)
14890     return SDValue();
14891 
14892   // Expose the DAG combiner to the target combiner implementations.
14893   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
14894 
14895   unsigned Iterations = 0;
14896   if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
14897     if (Iterations) {
14898       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14899       // For the reciprocal, we need to find the zero of the function:
14900       //   F(X) = A X - 1 [which has a zero at X = 1/A]
14901       //     =>
14902       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
14903       //     does not require additional intermediate precision]
14904       EVT VT = Op.getValueType();
14905       SDLoc DL(Op);
14906       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14907 
14908       AddToWorklist(Est.getNode());
14909 
14910       // Newton iterations: Est = Est + Est (1 - Arg * Est)
14911       for (unsigned i = 0; i < Iterations; ++i) {
14912         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
14913         AddToWorklist(NewEst.getNode());
14914 
14915         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
14916         AddToWorklist(NewEst.getNode());
14917 
14918         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14919         AddToWorklist(NewEst.getNode());
14920 
14921         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
14922         AddToWorklist(Est.getNode());
14923       }
14924     }
14925     return Est;
14926   }
14927 
14928   return SDValue();
14929 }
14930 
14931 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14932 /// For the reciprocal sqrt, we need to find the zero of the function:
14933 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14934 ///     =>
14935 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
14936 /// As a result, we precompute A/2 prior to the iteration loop.
14937 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
14938                                          unsigned Iterations,
14939                                          SDNodeFlags *Flags, bool Reciprocal) {
14940   EVT VT = Arg.getValueType();
14941   SDLoc DL(Arg);
14942   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
14943 
14944   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
14945   // this entire sequence requires only one FP constant.
14946   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
14947   AddToWorklist(HalfArg.getNode());
14948 
14949   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
14950   AddToWorklist(HalfArg.getNode());
14951 
14952   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
14953   for (unsigned i = 0; i < Iterations; ++i) {
14954     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
14955     AddToWorklist(NewEst.getNode());
14956 
14957     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
14958     AddToWorklist(NewEst.getNode());
14959 
14960     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
14961     AddToWorklist(NewEst.getNode());
14962 
14963     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14964     AddToWorklist(Est.getNode());
14965   }
14966 
14967   // If non-reciprocal square root is requested, multiply the result by Arg.
14968   if (!Reciprocal) {
14969     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
14970     AddToWorklist(Est.getNode());
14971   }
14972 
14973   return Est;
14974 }
14975 
14976 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14977 /// For the reciprocal sqrt, we need to find the zero of the function:
14978 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14979 ///     =>
14980 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
14981 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
14982                                          unsigned Iterations,
14983                                          SDNodeFlags *Flags, bool Reciprocal) {
14984   EVT VT = Arg.getValueType();
14985   SDLoc DL(Arg);
14986   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
14987   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
14988 
14989   // This routine must enter the loop below to work correctly
14990   // when (Reciprocal == false).
14991   assert(Iterations > 0);
14992 
14993   // Newton iterations for reciprocal square root:
14994   // E = (E * -0.5) * ((A * E) * E + -3.0)
14995   for (unsigned i = 0; i < Iterations; ++i) {
14996     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
14997     AddToWorklist(AE.getNode());
14998 
14999     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
15000     AddToWorklist(AEE.getNode());
15001 
15002     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
15003     AddToWorklist(RHS.getNode());
15004 
15005     // When calculating a square root at the last iteration build:
15006     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
15007     // (notice a common subexpression)
15008     SDValue LHS;
15009     if (Reciprocal || (i + 1) < Iterations) {
15010       // RSQRT: LHS = (E * -0.5)
15011       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
15012     } else {
15013       // SQRT: LHS = (A * E) * -0.5
15014       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
15015     }
15016     AddToWorklist(LHS.getNode());
15017 
15018     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
15019     AddToWorklist(Est.getNode());
15020   }
15021 
15022   return Est;
15023 }
15024 
15025 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
15026 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
15027 /// Op can be zero.
15028 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
15029                                            bool Reciprocal) {
15030   if (Level >= AfterLegalizeDAG)
15031     return SDValue();
15032 
15033   // Expose the DAG combiner to the target combiner implementations.
15034   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
15035   unsigned Iterations = 0;
15036   bool UseOneConstNR = false;
15037   if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
15038     AddToWorklist(Est.getNode());
15039     if (Iterations) {
15040       Est = UseOneConstNR
15041                 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
15042                 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
15043     }
15044     return Est;
15045   }
15046 
15047   return SDValue();
15048 }
15049 
15050 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15051   return buildSqrtEstimateImpl(Op, Flags, true);
15052 }
15053 
15054 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15055   SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
15056   if (!Est)
15057     return SDValue();
15058 
15059   // Unfortunately, Est is now NaN if the input was exactly 0.
15060   // Select out this case and force the answer to 0.
15061   EVT VT = Est.getValueType();
15062   SDLoc DL(Op);
15063   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
15064   EVT CCVT = getSetCCResultType(VT);
15065   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
15066   AddToWorklist(ZeroCmp.getNode());
15067 
15068   Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
15069                     Zero, Est);
15070   AddToWorklist(Est.getNode());
15071   return Est;
15072 }
15073 
15074 /// Return true if base is a frame index, which is known not to alias with
15075 /// anything but itself.  Provides base object and offset as results.
15076 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
15077                            const GlobalValue *&GV, const void *&CV) {
15078   // Assume it is a primitive operation.
15079   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
15080 
15081   // If it's an adding a simple constant then integrate the offset.
15082   if (Base.getOpcode() == ISD::ADD) {
15083     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
15084       Base = Base.getOperand(0);
15085       Offset += C->getZExtValue();
15086     }
15087   }
15088 
15089   // Return the underlying GlobalValue, and update the Offset.  Return false
15090   // for GlobalAddressSDNode since the same GlobalAddress may be represented
15091   // by multiple nodes with different offsets.
15092   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
15093     GV = G->getGlobal();
15094     Offset += G->getOffset();
15095     return false;
15096   }
15097 
15098   // Return the underlying Constant value, and update the Offset.  Return false
15099   // for ConstantSDNodes since the same constant pool entry may be represented
15100   // by multiple nodes with different offsets.
15101   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
15102     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
15103                                          : (const void *)C->getConstVal();
15104     Offset += C->getOffset();
15105     return false;
15106   }
15107   // If it's any of the following then it can't alias with anything but itself.
15108   return isa<FrameIndexSDNode>(Base);
15109 }
15110 
15111 /// Return true if there is any possibility that the two addresses overlap.
15112 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
15113   // If they are the same then they must be aliases.
15114   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
15115 
15116   // If they are both volatile then they cannot be reordered.
15117   if (Op0->isVolatile() && Op1->isVolatile()) return true;
15118 
15119   // If one operation reads from invariant memory, and the other may store, they
15120   // cannot alias. These should really be checking the equivalent of mayWrite,
15121   // but it only matters for memory nodes other than load /store.
15122   if (Op0->isInvariant() && Op1->writeMem())
15123     return false;
15124 
15125   if (Op1->isInvariant() && Op0->writeMem())
15126     return false;
15127 
15128   // Gather base node and offset information.
15129   SDValue Base1, Base2;
15130   int64_t Offset1, Offset2;
15131   const GlobalValue *GV1, *GV2;
15132   const void *CV1, *CV2;
15133   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
15134                                       Base1, Offset1, GV1, CV1);
15135   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
15136                                       Base2, Offset2, GV2, CV2);
15137 
15138   // If they have a same base address then check to see if they overlap.
15139   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
15140     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15141              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15142 
15143   // It is possible for different frame indices to alias each other, mostly
15144   // when tail call optimization reuses return address slots for arguments.
15145   // To catch this case, look up the actual index of frame indices to compute
15146   // the real alias relationship.
15147   if (isFrameIndex1 && isFrameIndex2) {
15148     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
15149     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
15150     Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
15151     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15152              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15153   }
15154 
15155   // Otherwise, if we know what the bases are, and they aren't identical, then
15156   // we know they cannot alias.
15157   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
15158     return false;
15159 
15160   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
15161   // compared to the size and offset of the access, we may be able to prove they
15162   // do not alias.  This check is conservative for now to catch cases created by
15163   // splitting vector types.
15164   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
15165       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
15166       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
15167        Op1->getMemoryVT().getSizeInBits() >> 3) &&
15168       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
15169     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
15170     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
15171 
15172     // There is no overlap between these relatively aligned accesses of similar
15173     // size, return no alias.
15174     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
15175         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
15176       return false;
15177   }
15178 
15179   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
15180                    ? CombinerGlobalAA
15181                    : DAG.getSubtarget().useAA();
15182 #ifndef NDEBUG
15183   if (CombinerAAOnlyFunc.getNumOccurrences() &&
15184       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
15185     UseAA = false;
15186 #endif
15187   if (UseAA &&
15188       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
15189     // Use alias analysis information.
15190     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
15191                                  Op1->getSrcValueOffset());
15192     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
15193         Op0->getSrcValueOffset() - MinOffset;
15194     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
15195         Op1->getSrcValueOffset() - MinOffset;
15196     AliasResult AAResult =
15197         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
15198                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
15199                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
15200                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
15201     if (AAResult == NoAlias)
15202       return false;
15203   }
15204 
15205   // Otherwise we have to assume they alias.
15206   return true;
15207 }
15208 
15209 /// Walk up chain skipping non-aliasing memory nodes,
15210 /// looking for aliasing nodes and adding them to the Aliases vector.
15211 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
15212                                    SmallVectorImpl<SDValue> &Aliases) {
15213   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
15214   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
15215 
15216   // Get alias information for node.
15217   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
15218 
15219   // Starting off.
15220   Chains.push_back(OriginalChain);
15221   unsigned Depth = 0;
15222 
15223   // Look at each chain and determine if it is an alias.  If so, add it to the
15224   // aliases list.  If not, then continue up the chain looking for the next
15225   // candidate.
15226   while (!Chains.empty()) {
15227     SDValue Chain = Chains.pop_back_val();
15228 
15229     // For TokenFactor nodes, look at each operand and only continue up the
15230     // chain until we reach the depth limit.
15231     //
15232     // FIXME: The depth check could be made to return the last non-aliasing
15233     // chain we found before we hit a tokenfactor rather than the original
15234     // chain.
15235     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
15236       Aliases.clear();
15237       Aliases.push_back(OriginalChain);
15238       return;
15239     }
15240 
15241     // Don't bother if we've been before.
15242     if (!Visited.insert(Chain.getNode()).second)
15243       continue;
15244 
15245     switch (Chain.getOpcode()) {
15246     case ISD::EntryToken:
15247       // Entry token is ideal chain operand, but handled in FindBetterChain.
15248       break;
15249 
15250     case ISD::LOAD:
15251     case ISD::STORE: {
15252       // Get alias information for Chain.
15253       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
15254           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
15255 
15256       // If chain is alias then stop here.
15257       if (!(IsLoad && IsOpLoad) &&
15258           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
15259         Aliases.push_back(Chain);
15260       } else {
15261         // Look further up the chain.
15262         Chains.push_back(Chain.getOperand(0));
15263         ++Depth;
15264       }
15265       break;
15266     }
15267 
15268     case ISD::TokenFactor:
15269       // We have to check each of the operands of the token factor for "small"
15270       // token factors, so we queue them up.  Adding the operands to the queue
15271       // (stack) in reverse order maintains the original order and increases the
15272       // likelihood that getNode will find a matching token factor (CSE.)
15273       if (Chain.getNumOperands() > 16) {
15274         Aliases.push_back(Chain);
15275         break;
15276       }
15277       for (unsigned n = Chain.getNumOperands(); n;)
15278         Chains.push_back(Chain.getOperand(--n));
15279       ++Depth;
15280       break;
15281 
15282     default:
15283       // For all other instructions we will just have to take what we can get.
15284       Aliases.push_back(Chain);
15285       break;
15286     }
15287   }
15288 }
15289 
15290 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
15291 /// (aliasing node.)
15292 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
15293   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
15294 
15295   // Accumulate all the aliases to this node.
15296   GatherAllAliases(N, OldChain, Aliases);
15297 
15298   // If no operands then chain to entry token.
15299   if (Aliases.size() == 0)
15300     return DAG.getEntryNode();
15301 
15302   // If a single operand then chain to it.  We don't need to revisit it.
15303   if (Aliases.size() == 1)
15304     return Aliases[0];
15305 
15306   // Construct a custom tailored token factor.
15307   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
15308 }
15309 
15310 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
15311   // This holds the base pointer, index, and the offset in bytes from the base
15312   // pointer.
15313   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
15314 
15315   // We must have a base and an offset.
15316   if (!BasePtr.Base.getNode())
15317     return false;
15318 
15319   // Do not handle stores to undef base pointers.
15320   if (BasePtr.Base.isUndef())
15321     return false;
15322 
15323   SmallVector<StoreSDNode *, 8> ChainedStores;
15324   ChainedStores.push_back(St);
15325 
15326   // Walk up the chain and look for nodes with offsets from the same
15327   // base pointer. Stop when reaching an instruction with a different kind
15328   // or instruction which has a different base pointer.
15329   StoreSDNode *Index = St;
15330   while (Index) {
15331     // If the chain has more than one use, then we can't reorder the mem ops.
15332     if (Index != St && !SDValue(Index, 0)->hasOneUse())
15333       break;
15334 
15335     if (Index->isVolatile() || Index->isIndexed())
15336       break;
15337 
15338     // Find the base pointer and offset for this memory node.
15339     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
15340 
15341     // Check that the base pointer is the same as the original one.
15342     if (!Ptr.equalBaseIndex(BasePtr))
15343       break;
15344 
15345     // Find the next memory operand in the chain. If the next operand in the
15346     // chain is a store then move up and continue the scan with the next
15347     // memory operand. If the next operand is a load save it and use alias
15348     // information to check if it interferes with anything.
15349     SDNode *NextInChain = Index->getChain().getNode();
15350     while (true) {
15351       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
15352         // We found a store node. Use it for the next iteration.
15353         if (STn->isVolatile() || STn->isIndexed()) {
15354           Index = nullptr;
15355           break;
15356         }
15357         ChainedStores.push_back(STn);
15358         Index = STn;
15359         break;
15360       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
15361         NextInChain = Ldn->getChain().getNode();
15362         continue;
15363       } else {
15364         Index = nullptr;
15365         break;
15366       }
15367     }
15368   }
15369 
15370   bool MadeChangeToSt = false;
15371   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
15372 
15373   for (StoreSDNode *ChainedStore : ChainedStores) {
15374     SDValue Chain = ChainedStore->getChain();
15375     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
15376 
15377     if (Chain != BetterChain) {
15378       if (ChainedStore == St)
15379         MadeChangeToSt = true;
15380       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
15381     }
15382   }
15383 
15384   // Do all replacements after finding the replacements to make to avoid making
15385   // the chains more complicated by introducing new TokenFactors.
15386   for (auto Replacement : BetterChains)
15387     replaceStoreChain(Replacement.first, Replacement.second);
15388 
15389   return MadeChangeToSt;
15390 }
15391 
15392 /// This is the entry point for the file.
15393 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
15394                            CodeGenOpt::Level OptLevel) {
15395   /// This is the main entry point to this class.
15396   DAGCombiner(*this, AA, OptLevel).Run(Level);
15397 }
15398