1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Target/TargetLowering.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSubtargetInfo.h"
40 #include <algorithm>
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "dagcombine"
44 
45 STATISTIC(NodesCombined   , "Number of dag nodes combined");
46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
48 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
49 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
50 STATISTIC(SlicedLoads, "Number of load sliced");
51 
52 namespace {
53   static cl::opt<bool>
54     CombinerAA("combiner-alias-analysis", cl::Hidden,
55                cl::desc("Enable DAG combiner alias-analysis heuristics"));
56 
57   static cl::opt<bool>
58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61   static cl::opt<bool>
62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63                cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
66   static cl::opt<std::string>
67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68                cl::desc("Only use DAG-combiner alias analysis in this"
69                         " function"));
70 #endif
71 
72   /// Hidden option to stress test load slicing, i.e., when this option
73   /// is enabled, load slicing bypasses most of its profitability guards.
74   static cl::opt<bool>
75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76                     cl::desc("Bypass the profitability model of load "
77                              "slicing"),
78                     cl::init(false));
79 
80   static cl::opt<bool>
81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82                       cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86   class DAGCombiner {
87     SelectionDAG &DAG;
88     const TargetLowering &TLI;
89     CombineLevel Level;
90     CodeGenOpt::Level OptLevel;
91     bool LegalOperations;
92     bool LegalTypes;
93     bool ForCodeSize;
94 
95     /// \brief Worklist of all of the nodes that need to be simplified.
96     ///
97     /// This must behave as a stack -- new nodes to process are pushed onto the
98     /// back and when processing we pop off of the back.
99     ///
100     /// The worklist will not contain duplicates but may contain null entries
101     /// due to nodes being deleted from the underlying DAG.
102     SmallVector<SDNode *, 64> Worklist;
103 
104     /// \brief Mapping from an SDNode to its position on the worklist.
105     ///
106     /// This is used to find and remove nodes from the worklist (by nulling
107     /// them) when they are deleted from the underlying DAG. It relies on
108     /// stable indices of nodes within the worklist.
109     DenseMap<SDNode *, unsigned> WorklistMap;
110 
111     /// \brief Set of nodes which have been combined (at least once).
112     ///
113     /// This is used to allow us to reliably add any operands of a DAG node
114     /// which have not yet been combined to the worklist.
115     SmallPtrSet<SDNode *, 32> CombinedNodes;
116 
117     // AA - Used for DAG load/store alias analysis.
118     AliasAnalysis &AA;
119 
120     /// When an instruction is simplified, add all users of the instruction to
121     /// the work lists because they might get more simplified now.
122     void AddUsersToWorklist(SDNode *N) {
123       for (SDNode *Node : N->uses())
124         AddToWorklist(Node);
125     }
126 
127     /// Call the node-specific routine that folds each particular type of node.
128     SDValue visit(SDNode *N);
129 
130   public:
131     /// Add to the worklist making sure its instance is at the back (next to be
132     /// processed.)
133     void AddToWorklist(SDNode *N) {
134       // Skip handle nodes as they can't usefully be combined and confuse the
135       // zero-use deletion strategy.
136       if (N->getOpcode() == ISD::HANDLENODE)
137         return;
138 
139       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
140         Worklist.push_back(N);
141     }
142 
143     /// Remove all instances of N from the worklist.
144     void removeFromWorklist(SDNode *N) {
145       CombinedNodes.erase(N);
146 
147       auto It = WorklistMap.find(N);
148       if (It == WorklistMap.end())
149         return; // Not in the worklist.
150 
151       // Null out the entry rather than erasing it to avoid a linear operation.
152       Worklist[It->second] = nullptr;
153       WorklistMap.erase(It);
154     }
155 
156     void deleteAndRecombine(SDNode *N);
157     bool recursivelyDeleteUnusedNodes(SDNode *N);
158 
159     /// Replaces all uses of the results of one DAG node with new values.
160     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
161                       bool AddTo = true);
162 
163     /// Replaces all uses of the results of one DAG node with new values.
164     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
165       return CombineTo(N, &Res, 1, AddTo);
166     }
167 
168     /// Replaces all uses of the results of one DAG node with new values.
169     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
170                       bool AddTo = true) {
171       SDValue To[] = { Res0, Res1 };
172       return CombineTo(N, To, 2, AddTo);
173     }
174 
175     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
176 
177   private:
178 
179     /// Check the specified integer node value to see if it can be simplified or
180     /// if things it uses can be simplified by bit propagation.
181     /// If so, return true.
182     bool SimplifyDemandedBits(SDValue Op) {
183       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
184       APInt Demanded = APInt::getAllOnesValue(BitWidth);
185       return SimplifyDemandedBits(Op, Demanded);
186     }
187 
188     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
189 
190     bool CombineToPreIndexedLoadStore(SDNode *N);
191     bool CombineToPostIndexedLoadStore(SDNode *N);
192     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
193     bool SliceUpLoad(SDNode *N);
194 
195     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
196     ///   load.
197     ///
198     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
199     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
200     /// \param EltNo index of the vector element to load.
201     /// \param OriginalLoad load that EVE came from to be replaced.
202     /// \returns EVE on success SDValue() on failure.
203     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
204         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
205     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
206     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
207     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
208     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
209     SDValue PromoteIntBinOp(SDValue Op);
210     SDValue PromoteIntShiftOp(SDValue Op);
211     SDValue PromoteExtend(SDValue Op);
212     bool PromoteLoad(SDValue Op);
213 
214     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
215                          SDValue Trunc, SDValue ExtLoad, SDLoc DL,
216                          ISD::NodeType ExtType);
217 
218     /// Call the node-specific routine that knows how to fold each
219     /// particular type of node. If that doesn't do anything, try the
220     /// target-specific DAG combines.
221     SDValue combine(SDNode *N);
222 
223     // Visitation implementation - Implement dag node combining for different
224     // node types.  The semantics are as follows:
225     // Return Value:
226     //   SDValue.getNode() == 0 - No change was made
227     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
228     //   otherwise              - N should be replaced by the returned Operand.
229     //
230     SDValue visitTokenFactor(SDNode *N);
231     SDValue visitMERGE_VALUES(SDNode *N);
232     SDValue visitADD(SDNode *N);
233     SDValue visitSUB(SDNode *N);
234     SDValue visitADDC(SDNode *N);
235     SDValue visitSUBC(SDNode *N);
236     SDValue visitADDE(SDNode *N);
237     SDValue visitSUBE(SDNode *N);
238     SDValue visitMUL(SDNode *N);
239     SDValue useDivRem(SDNode *N);
240     SDValue visitSDIV(SDNode *N);
241     SDValue visitUDIV(SDNode *N);
242     SDValue visitREM(SDNode *N);
243     SDValue visitMULHU(SDNode *N);
244     SDValue visitMULHS(SDNode *N);
245     SDValue visitSMUL_LOHI(SDNode *N);
246     SDValue visitUMUL_LOHI(SDNode *N);
247     SDValue visitSMULO(SDNode *N);
248     SDValue visitUMULO(SDNode *N);
249     SDValue visitIMINMAX(SDNode *N);
250     SDValue visitAND(SDNode *N);
251     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
252     SDValue visitOR(SDNode *N);
253     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
254     SDValue visitXOR(SDNode *N);
255     SDValue SimplifyVBinOp(SDNode *N);
256     SDValue visitSHL(SDNode *N);
257     SDValue visitSRA(SDNode *N);
258     SDValue visitSRL(SDNode *N);
259     SDValue visitRotate(SDNode *N);
260     SDValue visitBSWAP(SDNode *N);
261     SDValue visitCTLZ(SDNode *N);
262     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
263     SDValue visitCTTZ(SDNode *N);
264     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
265     SDValue visitCTPOP(SDNode *N);
266     SDValue visitSELECT(SDNode *N);
267     SDValue visitVSELECT(SDNode *N);
268     SDValue visitSELECT_CC(SDNode *N);
269     SDValue visitSETCC(SDNode *N);
270     SDValue visitSETCCE(SDNode *N);
271     SDValue visitSIGN_EXTEND(SDNode *N);
272     SDValue visitZERO_EXTEND(SDNode *N);
273     SDValue visitANY_EXTEND(SDNode *N);
274     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
275     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
276     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
277     SDValue visitTRUNCATE(SDNode *N);
278     SDValue visitBITCAST(SDNode *N);
279     SDValue visitBUILD_PAIR(SDNode *N);
280     SDValue visitFADD(SDNode *N);
281     SDValue visitFSUB(SDNode *N);
282     SDValue visitFMUL(SDNode *N);
283     SDValue visitFMA(SDNode *N);
284     SDValue visitFDIV(SDNode *N);
285     SDValue visitFREM(SDNode *N);
286     SDValue visitFSQRT(SDNode *N);
287     SDValue visitFCOPYSIGN(SDNode *N);
288     SDValue visitSINT_TO_FP(SDNode *N);
289     SDValue visitUINT_TO_FP(SDNode *N);
290     SDValue visitFP_TO_SINT(SDNode *N);
291     SDValue visitFP_TO_UINT(SDNode *N);
292     SDValue visitFP_ROUND(SDNode *N);
293     SDValue visitFP_ROUND_INREG(SDNode *N);
294     SDValue visitFP_EXTEND(SDNode *N);
295     SDValue visitFNEG(SDNode *N);
296     SDValue visitFABS(SDNode *N);
297     SDValue visitFCEIL(SDNode *N);
298     SDValue visitFTRUNC(SDNode *N);
299     SDValue visitFFLOOR(SDNode *N);
300     SDValue visitFMINNUM(SDNode *N);
301     SDValue visitFMAXNUM(SDNode *N);
302     SDValue visitBRCOND(SDNode *N);
303     SDValue visitBR_CC(SDNode *N);
304     SDValue visitLOAD(SDNode *N);
305 
306     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
307     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
308 
309     SDValue visitSTORE(SDNode *N);
310     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
311     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
312     SDValue visitBUILD_VECTOR(SDNode *N);
313     SDValue visitCONCAT_VECTORS(SDNode *N);
314     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
315     SDValue visitVECTOR_SHUFFLE(SDNode *N);
316     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
317     SDValue visitINSERT_SUBVECTOR(SDNode *N);
318     SDValue visitMLOAD(SDNode *N);
319     SDValue visitMSTORE(SDNode *N);
320     SDValue visitMGATHER(SDNode *N);
321     SDValue visitMSCATTER(SDNode *N);
322     SDValue visitFP_TO_FP16(SDNode *N);
323     SDValue visitFP16_TO_FP(SDNode *N);
324 
325     SDValue visitFADDForFMACombine(SDNode *N);
326     SDValue visitFSUBForFMACombine(SDNode *N);
327     SDValue visitFMULForFMACombine(SDNode *N);
328 
329     SDValue XformToShuffleWithZero(SDNode *N);
330     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
331 
332     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
333 
334     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
335     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
336     SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
337     SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
338                              SDValue N3, ISD::CondCode CC,
339                              bool NotExtCompare = false);
340     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
341                           SDLoc DL, bool foldBooleans = true);
342 
343     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
344                            SDValue &CC) const;
345     bool isOneUseSetCC(SDValue N) const;
346 
347     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
348                                          unsigned HiOp);
349     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
350     SDValue CombineExtLoad(SDNode *N);
351     SDValue combineRepeatedFPDivisors(SDNode *N);
352     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
353     SDValue BuildSDIV(SDNode *N);
354     SDValue BuildSDIVPow2(SDNode *N);
355     SDValue BuildUDIV(SDNode *N);
356     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
357     SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
358     SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
359                                  SDNodeFlags *Flags);
360     SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
361                                  SDNodeFlags *Flags);
362     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
363                                bool DemandHighBits = true);
364     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
365     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
366                               SDValue InnerPos, SDValue InnerNeg,
367                               unsigned PosOpcode, unsigned NegOpcode,
368                               SDLoc DL);
369     SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
370     SDValue ReduceLoadWidth(SDNode *N);
371     SDValue ReduceLoadOpStoreWidth(SDNode *N);
372     SDValue TransformFPLoadStorePair(SDNode *N);
373     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
374     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
375 
376     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
377 
378     /// Walk up chain skipping non-aliasing memory nodes,
379     /// looking for aliasing nodes and adding them to the Aliases vector.
380     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
381                           SmallVectorImpl<SDValue> &Aliases);
382 
383     /// Return true if there is any possibility that the two addresses overlap.
384     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
385 
386     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
387     /// chain (aliasing node.)
388     SDValue FindBetterChain(SDNode *N, SDValue Chain);
389 
390     /// Do FindBetterChain for a store and any possibly adjacent stores on
391     /// consecutive chains.
392     bool findBetterNeighborChains(StoreSDNode *St);
393 
394     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
395     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
396 
397     /// Holds a pointer to an LSBaseSDNode as well as information on where it
398     /// is located in a sequence of memory operations connected by a chain.
399     struct MemOpLink {
400       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
401       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
402       // Ptr to the mem node.
403       LSBaseSDNode *MemNode;
404       // Offset from the base ptr.
405       int64_t OffsetFromBase;
406       // What is the sequence number of this mem node.
407       // Lowest mem operand in the DAG starts at zero.
408       unsigned SequenceNum;
409     };
410 
411     /// This is a helper function for visitMUL to check the profitability
412     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
413     /// MulNode is the original multiply, AddNode is (add x, c1),
414     /// and ConstNode is c2.
415     bool isMulAddWithConstProfitable(SDNode *MulNode,
416                                      SDValue &AddNode,
417                                      SDValue &ConstNode);
418 
419     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
420     /// constant build_vector of the stored constant values in Stores.
421     SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
422                                          SDLoc SL,
423                                          ArrayRef<MemOpLink> Stores,
424                                          SmallVectorImpl<SDValue> &Chains,
425                                          EVT Ty) const;
426 
427     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
428     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
429     /// the type of the loaded value to be extended.  LoadedVT returns the type
430     /// of the original loaded value.  NarrowLoad returns whether the load would
431     /// need to be narrowed in order to match.
432     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
433                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
434                           bool &NarrowLoad);
435 
436     /// This is a helper function for MergeConsecutiveStores. When the source
437     /// elements of the consecutive stores are all constants or all extracted
438     /// vector elements, try to merge them into one larger store.
439     /// \return True if a merged store was created.
440     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
441                                          EVT MemVT, unsigned NumStores,
442                                          bool IsConstantSrc, bool UseVector);
443 
444     /// This is a helper function for MergeConsecutiveStores.
445     /// Stores that may be merged are placed in StoreNodes.
446     /// Loads that may alias with those stores are placed in AliasLoadNodes.
447     void getStoreMergeAndAliasCandidates(
448         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
449         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
450 
451     /// Merge consecutive store operations into a wide store.
452     /// This optimization uses wide integers or vectors when possible.
453     /// \return True if some memory operations were changed.
454     bool MergeConsecutiveStores(StoreSDNode *N);
455 
456     /// \brief Try to transform a truncation where C is a constant:
457     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
458     ///
459     /// \p N needs to be a truncation and its first operand an AND. Other
460     /// requirements are checked by the function (e.g. that trunc is
461     /// single-use) and if missed an empty SDValue is returned.
462     SDValue distributeTruncateThroughAnd(SDNode *N);
463 
464   public:
465     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
466         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
467           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
468       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
469     }
470 
471     /// Runs the dag combiner on all nodes in the work list
472     void Run(CombineLevel AtLevel);
473 
474     SelectionDAG &getDAG() const { return DAG; }
475 
476     /// Returns a type large enough to hold any valid shift amount - before type
477     /// legalization these can be huge.
478     EVT getShiftAmountTy(EVT LHSTy) {
479       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
480       if (LHSTy.isVector())
481         return LHSTy;
482       auto &DL = DAG.getDataLayout();
483       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
484                         : TLI.getPointerTy(DL);
485     }
486 
487     /// This method returns true if we are running before type legalization or
488     /// if the specified VT is legal.
489     bool isTypeLegal(const EVT &VT) {
490       if (!LegalTypes) return true;
491       return TLI.isTypeLegal(VT);
492     }
493 
494     /// Convenience wrapper around TargetLowering::getSetCCResultType
495     EVT getSetCCResultType(EVT VT) const {
496       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
497     }
498   };
499 }
500 
501 
502 namespace {
503 /// This class is a DAGUpdateListener that removes any deleted
504 /// nodes from the worklist.
505 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
506   DAGCombiner &DC;
507 public:
508   explicit WorklistRemover(DAGCombiner &dc)
509     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
510 
511   void NodeDeleted(SDNode *N, SDNode *E) override {
512     DC.removeFromWorklist(N);
513   }
514 };
515 }
516 
517 //===----------------------------------------------------------------------===//
518 //  TargetLowering::DAGCombinerInfo implementation
519 //===----------------------------------------------------------------------===//
520 
521 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
522   ((DAGCombiner*)DC)->AddToWorklist(N);
523 }
524 
525 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
526   ((DAGCombiner*)DC)->removeFromWorklist(N);
527 }
528 
529 SDValue TargetLowering::DAGCombinerInfo::
530 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
531   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
532 }
533 
534 SDValue TargetLowering::DAGCombinerInfo::
535 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
536   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
537 }
538 
539 
540 SDValue TargetLowering::DAGCombinerInfo::
541 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
542   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
543 }
544 
545 void TargetLowering::DAGCombinerInfo::
546 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
547   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
548 }
549 
550 //===----------------------------------------------------------------------===//
551 // Helper Functions
552 //===----------------------------------------------------------------------===//
553 
554 void DAGCombiner::deleteAndRecombine(SDNode *N) {
555   removeFromWorklist(N);
556 
557   // If the operands of this node are only used by the node, they will now be
558   // dead. Make sure to re-visit them and recursively delete dead nodes.
559   for (const SDValue &Op : N->ops())
560     // For an operand generating multiple values, one of the values may
561     // become dead allowing further simplification (e.g. split index
562     // arithmetic from an indexed load).
563     if (Op->hasOneUse() || Op->getNumValues() > 1)
564       AddToWorklist(Op.getNode());
565 
566   DAG.DeleteNode(N);
567 }
568 
569 /// Return 1 if we can compute the negated form of the specified expression for
570 /// the same cost as the expression itself, or 2 if we can compute the negated
571 /// form more cheaply than the expression itself.
572 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
573                                const TargetLowering &TLI,
574                                const TargetOptions *Options,
575                                unsigned Depth = 0) {
576   // fneg is removable even if it has multiple uses.
577   if (Op.getOpcode() == ISD::FNEG) return 2;
578 
579   // Don't allow anything with multiple uses.
580   if (!Op.hasOneUse()) return 0;
581 
582   // Don't recurse exponentially.
583   if (Depth > 6) return 0;
584 
585   switch (Op.getOpcode()) {
586   default: return false;
587   case ISD::ConstantFP:
588     // Don't invert constant FP values after legalize.  The negated constant
589     // isn't necessarily legal.
590     return LegalOperations ? 0 : 1;
591   case ISD::FADD:
592     // FIXME: determine better conditions for this xform.
593     if (!Options->UnsafeFPMath) return 0;
594 
595     // After operation legalization, it might not be legal to create new FSUBs.
596     if (LegalOperations &&
597         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
598       return 0;
599 
600     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
601     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
602                                     Options, Depth + 1))
603       return V;
604     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
605     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
606                               Depth + 1);
607   case ISD::FSUB:
608     // We can't turn -(A-B) into B-A when we honor signed zeros.
609     if (!Options->UnsafeFPMath) return 0;
610 
611     // fold (fneg (fsub A, B)) -> (fsub B, A)
612     return 1;
613 
614   case ISD::FMUL:
615   case ISD::FDIV:
616     if (Options->HonorSignDependentRoundingFPMath()) return 0;
617 
618     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
619     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
620                                     Options, Depth + 1))
621       return V;
622 
623     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
624                               Depth + 1);
625 
626   case ISD::FP_EXTEND:
627   case ISD::FP_ROUND:
628   case ISD::FSIN:
629     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
630                               Depth + 1);
631   }
632 }
633 
634 /// If isNegatibleForFree returns true, return the newly negated expression.
635 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
636                                     bool LegalOperations, unsigned Depth = 0) {
637   const TargetOptions &Options = DAG.getTarget().Options;
638   // fneg is removable even if it has multiple uses.
639   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
640 
641   // Don't allow anything with multiple uses.
642   assert(Op.hasOneUse() && "Unknown reuse!");
643 
644   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
645 
646   const SDNodeFlags *Flags = Op.getNode()->getFlags();
647 
648   switch (Op.getOpcode()) {
649   default: llvm_unreachable("Unknown code");
650   case ISD::ConstantFP: {
651     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
652     V.changeSign();
653     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
654   }
655   case ISD::FADD:
656     // FIXME: determine better conditions for this xform.
657     assert(Options.UnsafeFPMath);
658 
659     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
660     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
661                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
662       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
663                          GetNegatedExpression(Op.getOperand(0), DAG,
664                                               LegalOperations, Depth+1),
665                          Op.getOperand(1), Flags);
666     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
667     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
668                        GetNegatedExpression(Op.getOperand(1), DAG,
669                                             LegalOperations, Depth+1),
670                        Op.getOperand(0), Flags);
671   case ISD::FSUB:
672     // We can't turn -(A-B) into B-A when we honor signed zeros.
673     assert(Options.UnsafeFPMath);
674 
675     // fold (fneg (fsub 0, B)) -> B
676     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
677       if (N0CFP->isZero())
678         return Op.getOperand(1);
679 
680     // fold (fneg (fsub A, B)) -> (fsub B, A)
681     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
682                        Op.getOperand(1), Op.getOperand(0), Flags);
683 
684   case ISD::FMUL:
685   case ISD::FDIV:
686     assert(!Options.HonorSignDependentRoundingFPMath());
687 
688     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
689     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
690                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
691       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
692                          GetNegatedExpression(Op.getOperand(0), DAG,
693                                               LegalOperations, Depth+1),
694                          Op.getOperand(1), Flags);
695 
696     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
697     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
698                        Op.getOperand(0),
699                        GetNegatedExpression(Op.getOperand(1), DAG,
700                                             LegalOperations, Depth+1), Flags);
701 
702   case ISD::FP_EXTEND:
703   case ISD::FSIN:
704     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
705                        GetNegatedExpression(Op.getOperand(0), DAG,
706                                             LegalOperations, Depth+1));
707   case ISD::FP_ROUND:
708       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
709                          GetNegatedExpression(Op.getOperand(0), DAG,
710                                               LegalOperations, Depth+1),
711                          Op.getOperand(1));
712   }
713 }
714 
715 // Return true if this node is a setcc, or is a select_cc
716 // that selects between the target values used for true and false, making it
717 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
718 // the appropriate nodes based on the type of node we are checking. This
719 // simplifies life a bit for the callers.
720 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
721                                     SDValue &CC) const {
722   if (N.getOpcode() == ISD::SETCC) {
723     LHS = N.getOperand(0);
724     RHS = N.getOperand(1);
725     CC  = N.getOperand(2);
726     return true;
727   }
728 
729   if (N.getOpcode() != ISD::SELECT_CC ||
730       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
731       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
732     return false;
733 
734   if (TLI.getBooleanContents(N.getValueType()) ==
735       TargetLowering::UndefinedBooleanContent)
736     return false;
737 
738   LHS = N.getOperand(0);
739   RHS = N.getOperand(1);
740   CC  = N.getOperand(4);
741   return true;
742 }
743 
744 /// Return true if this is a SetCC-equivalent operation with only one use.
745 /// If this is true, it allows the users to invert the operation for free when
746 /// it is profitable to do so.
747 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
748   SDValue N0, N1, N2;
749   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
750     return true;
751   return false;
752 }
753 
754 /// Returns true if N is a BUILD_VECTOR node whose
755 /// elements are all the same constant or undefined.
756 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
757   BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
758   if (!C)
759     return false;
760 
761   APInt SplatUndef;
762   unsigned SplatBitSize;
763   bool HasAnyUndefs;
764   EVT EltVT = N->getValueType(0).getVectorElementType();
765   return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
766                              HasAnyUndefs) &&
767           EltVT.getSizeInBits() >= SplatBitSize);
768 }
769 
770 // \brief Returns the SDNode if it is a constant float BuildVector
771 // or constant float.
772 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
773   if (isa<ConstantFPSDNode>(N))
774     return N.getNode();
775   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
776     return N.getNode();
777   return nullptr;
778 }
779 
780 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
781 // int.
782 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
783   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
784     return CN;
785 
786   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
787     BitVector UndefElements;
788     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
789 
790     // BuildVectors can truncate their operands. Ignore that case here.
791     // FIXME: We blindly ignore splats which include undef which is overly
792     // pessimistic.
793     if (CN && UndefElements.none() &&
794         CN->getValueType(0) == N.getValueType().getScalarType())
795       return CN;
796   }
797 
798   return nullptr;
799 }
800 
801 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
802 // float.
803 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
804   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
805     return CN;
806 
807   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
808     BitVector UndefElements;
809     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
810 
811     if (CN && UndefElements.none())
812       return CN;
813   }
814 
815   return nullptr;
816 }
817 
818 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
819                                     SDValue N0, SDValue N1) {
820   EVT VT = N0.getValueType();
821   if (N0.getOpcode() == Opc) {
822     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
823       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
824         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
825         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
826           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
827         return SDValue();
828       }
829       if (N0.hasOneUse()) {
830         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
831         // use
832         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
833         if (!OpNode.getNode())
834           return SDValue();
835         AddToWorklist(OpNode.getNode());
836         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
837       }
838     }
839   }
840 
841   if (N1.getOpcode() == Opc) {
842     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
843       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
844         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
845         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
846           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
847         return SDValue();
848       }
849       if (N1.hasOneUse()) {
850         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
851         // use
852         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
853         if (!OpNode.getNode())
854           return SDValue();
855         AddToWorklist(OpNode.getNode());
856         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
857       }
858     }
859   }
860 
861   return SDValue();
862 }
863 
864 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
865                                bool AddTo) {
866   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
867   ++NodesCombined;
868   DEBUG(dbgs() << "\nReplacing.1 ";
869         N->dump(&DAG);
870         dbgs() << "\nWith: ";
871         To[0].getNode()->dump(&DAG);
872         dbgs() << " and " << NumTo-1 << " other values\n");
873   for (unsigned i = 0, e = NumTo; i != e; ++i)
874     assert((!To[i].getNode() ||
875             N->getValueType(i) == To[i].getValueType()) &&
876            "Cannot combine value to value of different type!");
877 
878   WorklistRemover DeadNodes(*this);
879   DAG.ReplaceAllUsesWith(N, To);
880   if (AddTo) {
881     // Push the new nodes and any users onto the worklist
882     for (unsigned i = 0, e = NumTo; i != e; ++i) {
883       if (To[i].getNode()) {
884         AddToWorklist(To[i].getNode());
885         AddUsersToWorklist(To[i].getNode());
886       }
887     }
888   }
889 
890   // Finally, if the node is now dead, remove it from the graph.  The node
891   // may not be dead if the replacement process recursively simplified to
892   // something else needing this node.
893   if (N->use_empty())
894     deleteAndRecombine(N);
895   return SDValue(N, 0);
896 }
897 
898 void DAGCombiner::
899 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
900   // Replace all uses.  If any nodes become isomorphic to other nodes and
901   // are deleted, make sure to remove them from our worklist.
902   WorklistRemover DeadNodes(*this);
903   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
904 
905   // Push the new node and any (possibly new) users onto the worklist.
906   AddToWorklist(TLO.New.getNode());
907   AddUsersToWorklist(TLO.New.getNode());
908 
909   // Finally, if the node is now dead, remove it from the graph.  The node
910   // may not be dead if the replacement process recursively simplified to
911   // something else needing this node.
912   if (TLO.Old.getNode()->use_empty())
913     deleteAndRecombine(TLO.Old.getNode());
914 }
915 
916 /// Check the specified integer node value to see if it can be simplified or if
917 /// things it uses can be simplified by bit propagation. If so, return true.
918 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
919   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
920   APInt KnownZero, KnownOne;
921   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
922     return false;
923 
924   // Revisit the node.
925   AddToWorklist(Op.getNode());
926 
927   // Replace the old value with the new one.
928   ++NodesCombined;
929   DEBUG(dbgs() << "\nReplacing.2 ";
930         TLO.Old.getNode()->dump(&DAG);
931         dbgs() << "\nWith: ";
932         TLO.New.getNode()->dump(&DAG);
933         dbgs() << '\n');
934 
935   CommitTargetLoweringOpt(TLO);
936   return true;
937 }
938 
939 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
940   SDLoc dl(Load);
941   EVT VT = Load->getValueType(0);
942   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
943 
944   DEBUG(dbgs() << "\nReplacing.9 ";
945         Load->dump(&DAG);
946         dbgs() << "\nWith: ";
947         Trunc.getNode()->dump(&DAG);
948         dbgs() << '\n');
949   WorklistRemover DeadNodes(*this);
950   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
951   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
952   deleteAndRecombine(Load);
953   AddToWorklist(Trunc.getNode());
954 }
955 
956 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
957   Replace = false;
958   SDLoc dl(Op);
959   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
960     EVT MemVT = LD->getMemoryVT();
961     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
962       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
963                                                        : ISD::EXTLOAD)
964       : LD->getExtensionType();
965     Replace = true;
966     return DAG.getExtLoad(ExtType, dl, PVT,
967                           LD->getChain(), LD->getBasePtr(),
968                           MemVT, LD->getMemOperand());
969   }
970 
971   unsigned Opc = Op.getOpcode();
972   switch (Opc) {
973   default: break;
974   case ISD::AssertSext:
975     return DAG.getNode(ISD::AssertSext, dl, PVT,
976                        SExtPromoteOperand(Op.getOperand(0), PVT),
977                        Op.getOperand(1));
978   case ISD::AssertZext:
979     return DAG.getNode(ISD::AssertZext, dl, PVT,
980                        ZExtPromoteOperand(Op.getOperand(0), PVT),
981                        Op.getOperand(1));
982   case ISD::Constant: {
983     unsigned ExtOpc =
984       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
985     return DAG.getNode(ExtOpc, dl, PVT, Op);
986   }
987   }
988 
989   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
990     return SDValue();
991   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
992 }
993 
994 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
995   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
996     return SDValue();
997   EVT OldVT = Op.getValueType();
998   SDLoc dl(Op);
999   bool Replace = false;
1000   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1001   if (!NewOp.getNode())
1002     return SDValue();
1003   AddToWorklist(NewOp.getNode());
1004 
1005   if (Replace)
1006     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1007   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
1008                      DAG.getValueType(OldVT));
1009 }
1010 
1011 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1012   EVT OldVT = Op.getValueType();
1013   SDLoc dl(Op);
1014   bool Replace = false;
1015   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1016   if (!NewOp.getNode())
1017     return SDValue();
1018   AddToWorklist(NewOp.getNode());
1019 
1020   if (Replace)
1021     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1022   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
1023 }
1024 
1025 /// Promote the specified integer binary operation if the target indicates it is
1026 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1027 /// i32 since i16 instructions are longer.
1028 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1029   if (!LegalOperations)
1030     return SDValue();
1031 
1032   EVT VT = Op.getValueType();
1033   if (VT.isVector() || !VT.isInteger())
1034     return SDValue();
1035 
1036   // If operation type is 'undesirable', e.g. i16 on x86, consider
1037   // promoting it.
1038   unsigned Opc = Op.getOpcode();
1039   if (TLI.isTypeDesirableForOp(Opc, VT))
1040     return SDValue();
1041 
1042   EVT PVT = VT;
1043   // Consult target whether it is a good idea to promote this operation and
1044   // what's the right type to promote it to.
1045   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1046     assert(PVT != VT && "Don't know what type to promote to!");
1047 
1048     bool Replace0 = false;
1049     SDValue N0 = Op.getOperand(0);
1050     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1051     if (!NN0.getNode())
1052       return SDValue();
1053 
1054     bool Replace1 = false;
1055     SDValue N1 = Op.getOperand(1);
1056     SDValue NN1;
1057     if (N0 == N1)
1058       NN1 = NN0;
1059     else {
1060       NN1 = PromoteOperand(N1, PVT, Replace1);
1061       if (!NN1.getNode())
1062         return SDValue();
1063     }
1064 
1065     AddToWorklist(NN0.getNode());
1066     if (NN1.getNode())
1067       AddToWorklist(NN1.getNode());
1068 
1069     if (Replace0)
1070       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1071     if (Replace1)
1072       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1073 
1074     DEBUG(dbgs() << "\nPromoting ";
1075           Op.getNode()->dump(&DAG));
1076     SDLoc dl(Op);
1077     return DAG.getNode(ISD::TRUNCATE, dl, VT,
1078                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
1079   }
1080   return SDValue();
1081 }
1082 
1083 /// Promote the specified integer shift operation if the target indicates it is
1084 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1085 /// i32 since i16 instructions are longer.
1086 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1087   if (!LegalOperations)
1088     return SDValue();
1089 
1090   EVT VT = Op.getValueType();
1091   if (VT.isVector() || !VT.isInteger())
1092     return SDValue();
1093 
1094   // If operation type is 'undesirable', e.g. i16 on x86, consider
1095   // promoting it.
1096   unsigned Opc = Op.getOpcode();
1097   if (TLI.isTypeDesirableForOp(Opc, VT))
1098     return SDValue();
1099 
1100   EVT PVT = VT;
1101   // Consult target whether it is a good idea to promote this operation and
1102   // what's the right type to promote it to.
1103   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1104     assert(PVT != VT && "Don't know what type to promote to!");
1105 
1106     bool Replace = false;
1107     SDValue N0 = Op.getOperand(0);
1108     if (Opc == ISD::SRA)
1109       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1110     else if (Opc == ISD::SRL)
1111       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1112     else
1113       N0 = PromoteOperand(N0, PVT, Replace);
1114     if (!N0.getNode())
1115       return SDValue();
1116 
1117     AddToWorklist(N0.getNode());
1118     if (Replace)
1119       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1120 
1121     DEBUG(dbgs() << "\nPromoting ";
1122           Op.getNode()->dump(&DAG));
1123     SDLoc dl(Op);
1124     return DAG.getNode(ISD::TRUNCATE, dl, VT,
1125                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
1126   }
1127   return SDValue();
1128 }
1129 
1130 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1131   if (!LegalOperations)
1132     return SDValue();
1133 
1134   EVT VT = Op.getValueType();
1135   if (VT.isVector() || !VT.isInteger())
1136     return SDValue();
1137 
1138   // If operation type is 'undesirable', e.g. i16 on x86, consider
1139   // promoting it.
1140   unsigned Opc = Op.getOpcode();
1141   if (TLI.isTypeDesirableForOp(Opc, VT))
1142     return SDValue();
1143 
1144   EVT PVT = VT;
1145   // Consult target whether it is a good idea to promote this operation and
1146   // what's the right type to promote it to.
1147   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1148     assert(PVT != VT && "Don't know what type to promote to!");
1149     // fold (aext (aext x)) -> (aext x)
1150     // fold (aext (zext x)) -> (zext x)
1151     // fold (aext (sext x)) -> (sext x)
1152     DEBUG(dbgs() << "\nPromoting ";
1153           Op.getNode()->dump(&DAG));
1154     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1155   }
1156   return SDValue();
1157 }
1158 
1159 bool DAGCombiner::PromoteLoad(SDValue Op) {
1160   if (!LegalOperations)
1161     return false;
1162 
1163   EVT VT = Op.getValueType();
1164   if (VT.isVector() || !VT.isInteger())
1165     return false;
1166 
1167   // If operation type is 'undesirable', e.g. i16 on x86, consider
1168   // promoting it.
1169   unsigned Opc = Op.getOpcode();
1170   if (TLI.isTypeDesirableForOp(Opc, VT))
1171     return false;
1172 
1173   EVT PVT = VT;
1174   // Consult target whether it is a good idea to promote this operation and
1175   // what's the right type to promote it to.
1176   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1177     assert(PVT != VT && "Don't know what type to promote to!");
1178 
1179     SDLoc dl(Op);
1180     SDNode *N = Op.getNode();
1181     LoadSDNode *LD = cast<LoadSDNode>(N);
1182     EVT MemVT = LD->getMemoryVT();
1183     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1184       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1185                                                        : ISD::EXTLOAD)
1186       : LD->getExtensionType();
1187     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
1188                                    LD->getChain(), LD->getBasePtr(),
1189                                    MemVT, LD->getMemOperand());
1190     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
1191 
1192     DEBUG(dbgs() << "\nPromoting ";
1193           N->dump(&DAG);
1194           dbgs() << "\nTo: ";
1195           Result.getNode()->dump(&DAG);
1196           dbgs() << '\n');
1197     WorklistRemover DeadNodes(*this);
1198     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1199     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1200     deleteAndRecombine(N);
1201     AddToWorklist(Result.getNode());
1202     return true;
1203   }
1204   return false;
1205 }
1206 
1207 /// \brief Recursively delete a node which has no uses and any operands for
1208 /// which it is the only use.
1209 ///
1210 /// Note that this both deletes the nodes and removes them from the worklist.
1211 /// It also adds any nodes who have had a user deleted to the worklist as they
1212 /// may now have only one use and subject to other combines.
1213 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1214   if (!N->use_empty())
1215     return false;
1216 
1217   SmallSetVector<SDNode *, 16> Nodes;
1218   Nodes.insert(N);
1219   do {
1220     N = Nodes.pop_back_val();
1221     if (!N)
1222       continue;
1223 
1224     if (N->use_empty()) {
1225       for (const SDValue &ChildN : N->op_values())
1226         Nodes.insert(ChildN.getNode());
1227 
1228       removeFromWorklist(N);
1229       DAG.DeleteNode(N);
1230     } else {
1231       AddToWorklist(N);
1232     }
1233   } while (!Nodes.empty());
1234   return true;
1235 }
1236 
1237 //===----------------------------------------------------------------------===//
1238 //  Main DAG Combiner implementation
1239 //===----------------------------------------------------------------------===//
1240 
1241 void DAGCombiner::Run(CombineLevel AtLevel) {
1242   // set the instance variables, so that the various visit routines may use it.
1243   Level = AtLevel;
1244   LegalOperations = Level >= AfterLegalizeVectorOps;
1245   LegalTypes = Level >= AfterLegalizeTypes;
1246 
1247   // Add all the dag nodes to the worklist.
1248   for (SDNode &Node : DAG.allnodes())
1249     AddToWorklist(&Node);
1250 
1251   // Create a dummy node (which is not added to allnodes), that adds a reference
1252   // to the root node, preventing it from being deleted, and tracking any
1253   // changes of the root.
1254   HandleSDNode Dummy(DAG.getRoot());
1255 
1256   // while the worklist isn't empty, find a node and
1257   // try and combine it.
1258   while (!WorklistMap.empty()) {
1259     SDNode *N;
1260     // The Worklist holds the SDNodes in order, but it may contain null entries.
1261     do {
1262       N = Worklist.pop_back_val();
1263     } while (!N);
1264 
1265     bool GoodWorklistEntry = WorklistMap.erase(N);
1266     (void)GoodWorklistEntry;
1267     assert(GoodWorklistEntry &&
1268            "Found a worklist entry without a corresponding map entry!");
1269 
1270     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1271     // N is deleted from the DAG, since they too may now be dead or may have a
1272     // reduced number of uses, allowing other xforms.
1273     if (recursivelyDeleteUnusedNodes(N))
1274       continue;
1275 
1276     WorklistRemover DeadNodes(*this);
1277 
1278     // If this combine is running after legalizing the DAG, re-legalize any
1279     // nodes pulled off the worklist.
1280     if (Level == AfterLegalizeDAG) {
1281       SmallSetVector<SDNode *, 16> UpdatedNodes;
1282       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1283 
1284       for (SDNode *LN : UpdatedNodes) {
1285         AddToWorklist(LN);
1286         AddUsersToWorklist(LN);
1287       }
1288       if (!NIsValid)
1289         continue;
1290     }
1291 
1292     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1293 
1294     // Add any operands of the new node which have not yet been combined to the
1295     // worklist as well. Because the worklist uniques things already, this
1296     // won't repeatedly process the same operand.
1297     CombinedNodes.insert(N);
1298     for (const SDValue &ChildN : N->op_values())
1299       if (!CombinedNodes.count(ChildN.getNode()))
1300         AddToWorklist(ChildN.getNode());
1301 
1302     SDValue RV = combine(N);
1303 
1304     if (!RV.getNode())
1305       continue;
1306 
1307     ++NodesCombined;
1308 
1309     // If we get back the same node we passed in, rather than a new node or
1310     // zero, we know that the node must have defined multiple values and
1311     // CombineTo was used.  Since CombineTo takes care of the worklist
1312     // mechanics for us, we have no work to do in this case.
1313     if (RV.getNode() == N)
1314       continue;
1315 
1316     assert(N->getOpcode() != ISD::DELETED_NODE &&
1317            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
1318            "Node was deleted but visit returned new node!");
1319 
1320     DEBUG(dbgs() << " ... into: ";
1321           RV.getNode()->dump(&DAG));
1322 
1323     // Transfer debug value.
1324     DAG.TransferDbgValues(SDValue(N, 0), RV);
1325     if (N->getNumValues() == RV.getNode()->getNumValues())
1326       DAG.ReplaceAllUsesWith(N, RV.getNode());
1327     else {
1328       assert(N->getValueType(0) == RV.getValueType() &&
1329              N->getNumValues() == 1 && "Type mismatch");
1330       SDValue OpV = RV;
1331       DAG.ReplaceAllUsesWith(N, &OpV);
1332     }
1333 
1334     // Push the new node and any users onto the worklist
1335     AddToWorklist(RV.getNode());
1336     AddUsersToWorklist(RV.getNode());
1337 
1338     // Finally, if the node is now dead, remove it from the graph.  The node
1339     // may not be dead if the replacement process recursively simplified to
1340     // something else needing this node. This will also take care of adding any
1341     // operands which have lost a user to the worklist.
1342     recursivelyDeleteUnusedNodes(N);
1343   }
1344 
1345   // If the root changed (e.g. it was a dead load, update the root).
1346   DAG.setRoot(Dummy.getValue());
1347   DAG.RemoveDeadNodes();
1348 }
1349 
1350 SDValue DAGCombiner::visit(SDNode *N) {
1351   switch (N->getOpcode()) {
1352   default: break;
1353   case ISD::TokenFactor:        return visitTokenFactor(N);
1354   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1355   case ISD::ADD:                return visitADD(N);
1356   case ISD::SUB:                return visitSUB(N);
1357   case ISD::ADDC:               return visitADDC(N);
1358   case ISD::SUBC:               return visitSUBC(N);
1359   case ISD::ADDE:               return visitADDE(N);
1360   case ISD::SUBE:               return visitSUBE(N);
1361   case ISD::MUL:                return visitMUL(N);
1362   case ISD::SDIV:               return visitSDIV(N);
1363   case ISD::UDIV:               return visitUDIV(N);
1364   case ISD::SREM:
1365   case ISD::UREM:               return visitREM(N);
1366   case ISD::MULHU:              return visitMULHU(N);
1367   case ISD::MULHS:              return visitMULHS(N);
1368   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1369   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1370   case ISD::SMULO:              return visitSMULO(N);
1371   case ISD::UMULO:              return visitUMULO(N);
1372   case ISD::SMIN:
1373   case ISD::SMAX:
1374   case ISD::UMIN:
1375   case ISD::UMAX:               return visitIMINMAX(N);
1376   case ISD::AND:                return visitAND(N);
1377   case ISD::OR:                 return visitOR(N);
1378   case ISD::XOR:                return visitXOR(N);
1379   case ISD::SHL:                return visitSHL(N);
1380   case ISD::SRA:                return visitSRA(N);
1381   case ISD::SRL:                return visitSRL(N);
1382   case ISD::ROTR:
1383   case ISD::ROTL:               return visitRotate(N);
1384   case ISD::BSWAP:              return visitBSWAP(N);
1385   case ISD::CTLZ:               return visitCTLZ(N);
1386   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1387   case ISD::CTTZ:               return visitCTTZ(N);
1388   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1389   case ISD::CTPOP:              return visitCTPOP(N);
1390   case ISD::SELECT:             return visitSELECT(N);
1391   case ISD::VSELECT:            return visitVSELECT(N);
1392   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1393   case ISD::SETCC:              return visitSETCC(N);
1394   case ISD::SETCCE:             return visitSETCCE(N);
1395   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1396   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1397   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1398   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1399   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1400   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1401   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1402   case ISD::BITCAST:            return visitBITCAST(N);
1403   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1404   case ISD::FADD:               return visitFADD(N);
1405   case ISD::FSUB:               return visitFSUB(N);
1406   case ISD::FMUL:               return visitFMUL(N);
1407   case ISD::FMA:                return visitFMA(N);
1408   case ISD::FDIV:               return visitFDIV(N);
1409   case ISD::FREM:               return visitFREM(N);
1410   case ISD::FSQRT:              return visitFSQRT(N);
1411   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1412   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1413   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1414   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1415   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1416   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1417   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1418   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1419   case ISD::FNEG:               return visitFNEG(N);
1420   case ISD::FABS:               return visitFABS(N);
1421   case ISD::FFLOOR:             return visitFFLOOR(N);
1422   case ISD::FMINNUM:            return visitFMINNUM(N);
1423   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1424   case ISD::FCEIL:              return visitFCEIL(N);
1425   case ISD::FTRUNC:             return visitFTRUNC(N);
1426   case ISD::BRCOND:             return visitBRCOND(N);
1427   case ISD::BR_CC:              return visitBR_CC(N);
1428   case ISD::LOAD:               return visitLOAD(N);
1429   case ISD::STORE:              return visitSTORE(N);
1430   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1431   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1432   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1433   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1434   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1435   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1436   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1437   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1438   case ISD::MGATHER:            return visitMGATHER(N);
1439   case ISD::MLOAD:              return visitMLOAD(N);
1440   case ISD::MSCATTER:           return visitMSCATTER(N);
1441   case ISD::MSTORE:             return visitMSTORE(N);
1442   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1443   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1444   }
1445   return SDValue();
1446 }
1447 
1448 SDValue DAGCombiner::combine(SDNode *N) {
1449   SDValue RV = visit(N);
1450 
1451   // If nothing happened, try a target-specific DAG combine.
1452   if (!RV.getNode()) {
1453     assert(N->getOpcode() != ISD::DELETED_NODE &&
1454            "Node was deleted but visit returned NULL!");
1455 
1456     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1457         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1458 
1459       // Expose the DAG combiner to the target combiner impls.
1460       TargetLowering::DAGCombinerInfo
1461         DagCombineInfo(DAG, Level, false, this);
1462 
1463       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1464     }
1465   }
1466 
1467   // If nothing happened still, try promoting the operation.
1468   if (!RV.getNode()) {
1469     switch (N->getOpcode()) {
1470     default: break;
1471     case ISD::ADD:
1472     case ISD::SUB:
1473     case ISD::MUL:
1474     case ISD::AND:
1475     case ISD::OR:
1476     case ISD::XOR:
1477       RV = PromoteIntBinOp(SDValue(N, 0));
1478       break;
1479     case ISD::SHL:
1480     case ISD::SRA:
1481     case ISD::SRL:
1482       RV = PromoteIntShiftOp(SDValue(N, 0));
1483       break;
1484     case ISD::SIGN_EXTEND:
1485     case ISD::ZERO_EXTEND:
1486     case ISD::ANY_EXTEND:
1487       RV = PromoteExtend(SDValue(N, 0));
1488       break;
1489     case ISD::LOAD:
1490       if (PromoteLoad(SDValue(N, 0)))
1491         RV = SDValue(N, 0);
1492       break;
1493     }
1494   }
1495 
1496   // If N is a commutative binary node, try commuting it to enable more
1497   // sdisel CSE.
1498   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1499       N->getNumValues() == 1) {
1500     SDValue N0 = N->getOperand(0);
1501     SDValue N1 = N->getOperand(1);
1502 
1503     // Constant operands are canonicalized to RHS.
1504     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1505       SDValue Ops[] = {N1, N0};
1506       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1507                                             N->getFlags());
1508       if (CSENode)
1509         return SDValue(CSENode, 0);
1510     }
1511   }
1512 
1513   return RV;
1514 }
1515 
1516 /// Given a node, return its input chain if it has one, otherwise return a null
1517 /// sd operand.
1518 static SDValue getInputChainForNode(SDNode *N) {
1519   if (unsigned NumOps = N->getNumOperands()) {
1520     if (N->getOperand(0).getValueType() == MVT::Other)
1521       return N->getOperand(0);
1522     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1523       return N->getOperand(NumOps-1);
1524     for (unsigned i = 1; i < NumOps-1; ++i)
1525       if (N->getOperand(i).getValueType() == MVT::Other)
1526         return N->getOperand(i);
1527   }
1528   return SDValue();
1529 }
1530 
1531 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1532   // If N has two operands, where one has an input chain equal to the other,
1533   // the 'other' chain is redundant.
1534   if (N->getNumOperands() == 2) {
1535     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1536       return N->getOperand(0);
1537     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1538       return N->getOperand(1);
1539   }
1540 
1541   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1542   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
1543   SmallPtrSet<SDNode*, 16> SeenOps;
1544   bool Changed = false;             // If we should replace this token factor.
1545 
1546   // Start out with this token factor.
1547   TFs.push_back(N);
1548 
1549   // Iterate through token factors.  The TFs grows when new token factors are
1550   // encountered.
1551   for (unsigned i = 0; i < TFs.size(); ++i) {
1552     SDNode *TF = TFs[i];
1553 
1554     // Check each of the operands.
1555     for (const SDValue &Op : TF->op_values()) {
1556 
1557       switch (Op.getOpcode()) {
1558       case ISD::EntryToken:
1559         // Entry tokens don't need to be added to the list. They are
1560         // redundant.
1561         Changed = true;
1562         break;
1563 
1564       case ISD::TokenFactor:
1565         if (Op.hasOneUse() &&
1566             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
1567           // Queue up for processing.
1568           TFs.push_back(Op.getNode());
1569           // Clean up in case the token factor is removed.
1570           AddToWorklist(Op.getNode());
1571           Changed = true;
1572           break;
1573         }
1574         // Fall thru
1575 
1576       default:
1577         // Only add if it isn't already in the list.
1578         if (SeenOps.insert(Op.getNode()).second)
1579           Ops.push_back(Op);
1580         else
1581           Changed = true;
1582         break;
1583       }
1584     }
1585   }
1586 
1587   SDValue Result;
1588 
1589   // If we've changed things around then replace token factor.
1590   if (Changed) {
1591     if (Ops.empty()) {
1592       // The entry token is the only possible outcome.
1593       Result = DAG.getEntryNode();
1594     } else {
1595       // New and improved token factor.
1596       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1597     }
1598 
1599     // Add users to worklist if AA is enabled, since it may introduce
1600     // a lot of new chained token factors while removing memory deps.
1601     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1602       : DAG.getSubtarget().useAA();
1603     return CombineTo(N, Result, UseAA /*add to worklist*/);
1604   }
1605 
1606   return Result;
1607 }
1608 
1609 /// MERGE_VALUES can always be eliminated.
1610 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1611   WorklistRemover DeadNodes(*this);
1612   // Replacing results may cause a different MERGE_VALUES to suddenly
1613   // be CSE'd with N, and carry its uses with it. Iterate until no
1614   // uses remain, to ensure that the node can be safely deleted.
1615   // First add the users of this node to the work list so that they
1616   // can be tried again once they have new operands.
1617   AddUsersToWorklist(N);
1618   do {
1619     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1620       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1621   } while (!N->use_empty());
1622   deleteAndRecombine(N);
1623   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1624 }
1625 
1626 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
1627 /// ContantSDNode pointer else nullptr.
1628 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1629   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1630   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1631 }
1632 
1633 SDValue DAGCombiner::visitADD(SDNode *N) {
1634   SDValue N0 = N->getOperand(0);
1635   SDValue N1 = N->getOperand(1);
1636   EVT VT = N0.getValueType();
1637 
1638   // fold vector ops
1639   if (VT.isVector()) {
1640     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1641       return FoldedVOp;
1642 
1643     // fold (add x, 0) -> x, vector edition
1644     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1645       return N0;
1646     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1647       return N1;
1648   }
1649 
1650   // fold (add x, undef) -> undef
1651   if (N0.isUndef())
1652     return N0;
1653   if (N1.isUndef())
1654     return N1;
1655   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1656     // canonicalize constant to RHS
1657     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1658       return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
1659     // fold (add c1, c2) -> c1+c2
1660     return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
1661                                       N0.getNode(), N1.getNode());
1662   }
1663   // fold (add x, 0) -> x
1664   if (isNullConstant(N1))
1665     return N0;
1666   // fold ((c1-A)+c2) -> (c1+c2)-A
1667   if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
1668     if (N0.getOpcode() == ISD::SUB)
1669       if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
1670         SDLoc DL(N);
1671         return DAG.getNode(ISD::SUB, DL, VT,
1672                            DAG.getConstant(N1C->getAPIntValue()+
1673                                            N0C->getAPIntValue(), DL, VT),
1674                            N0.getOperand(1));
1675       }
1676   }
1677   // reassociate add
1678   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
1679     return RADD;
1680   // fold ((0-A) + B) -> B-A
1681   if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
1682     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
1683   // fold (A + (0-B)) -> A-B
1684   if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
1685     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
1686   // fold (A+(B-A)) -> B
1687   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1688     return N1.getOperand(0);
1689   // fold ((B-A)+A) -> B
1690   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1691     return N0.getOperand(0);
1692   // fold (A+(B-(A+C))) to (B-C)
1693   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1694       N0 == N1.getOperand(1).getOperand(0))
1695     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1696                        N1.getOperand(1).getOperand(1));
1697   // fold (A+(B-(C+A))) to (B-C)
1698   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1699       N0 == N1.getOperand(1).getOperand(1))
1700     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1701                        N1.getOperand(1).getOperand(0));
1702   // fold (A+((B-A)+or-C)) to (B+or-C)
1703   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1704       N1.getOperand(0).getOpcode() == ISD::SUB &&
1705       N0 == N1.getOperand(0).getOperand(1))
1706     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
1707                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
1708 
1709   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1710   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1711     SDValue N00 = N0.getOperand(0);
1712     SDValue N01 = N0.getOperand(1);
1713     SDValue N10 = N1.getOperand(0);
1714     SDValue N11 = N1.getOperand(1);
1715 
1716     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1717       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1718                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1719                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1720   }
1721 
1722   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1723     return SDValue(N, 0);
1724 
1725   // fold (a+b) -> (a|b) iff a and b share no bits.
1726   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::OR, VT)) &&
1727       VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
1728     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
1729 
1730   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1731   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1732       isNullConstant(N1.getOperand(0).getOperand(0)))
1733     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
1734                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
1735                                    N1.getOperand(0).getOperand(1),
1736                                    N1.getOperand(1)));
1737   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1738       isNullConstant(N0.getOperand(0).getOperand(0)))
1739     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
1740                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
1741                                    N0.getOperand(0).getOperand(1),
1742                                    N0.getOperand(1)));
1743 
1744   if (N1.getOpcode() == ISD::AND) {
1745     SDValue AndOp0 = N1.getOperand(0);
1746     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1747     unsigned DestBits = VT.getScalarType().getSizeInBits();
1748 
1749     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1750     // and similar xforms where the inner op is either ~0 or 0.
1751     if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
1752       SDLoc DL(N);
1753       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1754     }
1755   }
1756 
1757   // add (sext i1), X -> sub X, (zext i1)
1758   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1759       N0.getOperand(0).getValueType() == MVT::i1 &&
1760       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1761     SDLoc DL(N);
1762     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1763     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1764   }
1765 
1766   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1767   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1768     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1769     if (TN->getVT() == MVT::i1) {
1770       SDLoc DL(N);
1771       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1772                                  DAG.getConstant(1, DL, VT));
1773       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1774     }
1775   }
1776 
1777   return SDValue();
1778 }
1779 
1780 SDValue DAGCombiner::visitADDC(SDNode *N) {
1781   SDValue N0 = N->getOperand(0);
1782   SDValue N1 = N->getOperand(1);
1783   EVT VT = N0.getValueType();
1784 
1785   // If the flag result is dead, turn this into an ADD.
1786   if (!N->hasAnyUseOfValue(1))
1787     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1788                      DAG.getNode(ISD::CARRY_FALSE,
1789                                  SDLoc(N), MVT::Glue));
1790 
1791   // canonicalize constant to RHS.
1792   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1793   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1794   if (N0C && !N1C)
1795     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1796 
1797   // fold (addc x, 0) -> x + no carry out
1798   if (isNullConstant(N1))
1799     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1800                                         SDLoc(N), MVT::Glue));
1801 
1802   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1803   APInt LHSZero, LHSOne;
1804   APInt RHSZero, RHSOne;
1805   DAG.computeKnownBits(N0, LHSZero, LHSOne);
1806 
1807   if (LHSZero.getBoolValue()) {
1808     DAG.computeKnownBits(N1, RHSZero, RHSOne);
1809 
1810     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1811     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1812     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1813       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1814                        DAG.getNode(ISD::CARRY_FALSE,
1815                                    SDLoc(N), MVT::Glue));
1816   }
1817 
1818   return SDValue();
1819 }
1820 
1821 SDValue DAGCombiner::visitADDE(SDNode *N) {
1822   SDValue N0 = N->getOperand(0);
1823   SDValue N1 = N->getOperand(1);
1824   SDValue CarryIn = N->getOperand(2);
1825 
1826   // canonicalize constant to RHS
1827   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1828   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1829   if (N0C && !N1C)
1830     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1831                        N1, N0, CarryIn);
1832 
1833   // fold (adde x, y, false) -> (addc x, y)
1834   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1835     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1836 
1837   return SDValue();
1838 }
1839 
1840 // Since it may not be valid to emit a fold to zero for vector initializers
1841 // check if we can before folding.
1842 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
1843                              SelectionDAG &DAG,
1844                              bool LegalOperations, bool LegalTypes) {
1845   if (!VT.isVector())
1846     return DAG.getConstant(0, DL, VT);
1847   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1848     return DAG.getConstant(0, DL, VT);
1849   return SDValue();
1850 }
1851 
1852 SDValue DAGCombiner::visitSUB(SDNode *N) {
1853   SDValue N0 = N->getOperand(0);
1854   SDValue N1 = N->getOperand(1);
1855   EVT VT = N0.getValueType();
1856 
1857   // fold vector ops
1858   if (VT.isVector()) {
1859     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1860       return FoldedVOp;
1861 
1862     // fold (sub x, 0) -> x, vector edition
1863     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1864       return N0;
1865   }
1866 
1867   // fold (sub x, x) -> 0
1868   // FIXME: Refactor this and xor and other similar operations together.
1869   if (N0 == N1)
1870     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
1871   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
1872       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1873     // fold (sub c1, c2) -> c1-c2
1874     return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
1875                                       N0.getNode(), N1.getNode());
1876   }
1877   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
1878   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
1879   // fold (sub x, c) -> (add x, -c)
1880   if (N1C) {
1881     SDLoc DL(N);
1882     return DAG.getNode(ISD::ADD, DL, VT, N0,
1883                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1884   }
1885   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1886   if (isAllOnesConstant(N0))
1887     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
1888   // fold A-(A-B) -> B
1889   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1890     return N1.getOperand(1);
1891   // fold (A+B)-A -> B
1892   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1893     return N0.getOperand(1);
1894   // fold (A+B)-B -> A
1895   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1896     return N0.getOperand(0);
1897   // fold C2-(A+C1) -> (C2-C1)-A
1898   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
1899     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
1900   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
1901     SDLoc DL(N);
1902     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
1903                                    DL, VT);
1904     return DAG.getNode(ISD::SUB, DL, VT, NewC,
1905                        N1.getOperand(0));
1906   }
1907   // fold ((A+(B+or-C))-B) -> A+or-C
1908   if (N0.getOpcode() == ISD::ADD &&
1909       (N0.getOperand(1).getOpcode() == ISD::SUB ||
1910        N0.getOperand(1).getOpcode() == ISD::ADD) &&
1911       N0.getOperand(1).getOperand(0) == N1)
1912     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
1913                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
1914   // fold ((A+(C+B))-B) -> A+C
1915   if (N0.getOpcode() == ISD::ADD &&
1916       N0.getOperand(1).getOpcode() == ISD::ADD &&
1917       N0.getOperand(1).getOperand(1) == N1)
1918     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1919                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
1920   // fold ((A-(B-C))-C) -> A-B
1921   if (N0.getOpcode() == ISD::SUB &&
1922       N0.getOperand(1).getOpcode() == ISD::SUB &&
1923       N0.getOperand(1).getOperand(1) == N1)
1924     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1925                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
1926 
1927   // If either operand of a sub is undef, the result is undef
1928   if (N0.isUndef())
1929     return N0;
1930   if (N1.isUndef())
1931     return N1;
1932 
1933   // If the relocation model supports it, consider symbol offsets.
1934   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1935     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1936       // fold (sub Sym, c) -> Sym-c
1937       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1938         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1939                                     GA->getOffset() -
1940                                       (uint64_t)N1C->getSExtValue());
1941       // fold (sub Sym+c1, Sym+c2) -> c1-c2
1942       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1943         if (GA->getGlobal() == GB->getGlobal())
1944           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1945                                  SDLoc(N), VT);
1946     }
1947 
1948   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
1949   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1950     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1951     if (TN->getVT() == MVT::i1) {
1952       SDLoc DL(N);
1953       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1954                                  DAG.getConstant(1, DL, VT));
1955       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
1956     }
1957   }
1958 
1959   return SDValue();
1960 }
1961 
1962 SDValue DAGCombiner::visitSUBC(SDNode *N) {
1963   SDValue N0 = N->getOperand(0);
1964   SDValue N1 = N->getOperand(1);
1965   EVT VT = N0.getValueType();
1966   SDLoc DL(N);
1967 
1968   // If the flag result is dead, turn this into an SUB.
1969   if (!N->hasAnyUseOfValue(1))
1970     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
1971                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1972 
1973   // fold (subc x, x) -> 0 + no borrow
1974   if (N0 == N1)
1975     return CombineTo(N, DAG.getConstant(0, DL, VT),
1976                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1977 
1978   // fold (subc x, 0) -> x + no borrow
1979   if (isNullConstant(N1))
1980     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1981 
1982   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
1983   if (isAllOnesConstant(N0))
1984     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
1985                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1986 
1987   return SDValue();
1988 }
1989 
1990 SDValue DAGCombiner::visitSUBE(SDNode *N) {
1991   SDValue N0 = N->getOperand(0);
1992   SDValue N1 = N->getOperand(1);
1993   SDValue CarryIn = N->getOperand(2);
1994 
1995   // fold (sube x, y, false) -> (subc x, y)
1996   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1997     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
1998 
1999   return SDValue();
2000 }
2001 
2002 SDValue DAGCombiner::visitMUL(SDNode *N) {
2003   SDValue N0 = N->getOperand(0);
2004   SDValue N1 = N->getOperand(1);
2005   EVT VT = N0.getValueType();
2006 
2007   // fold (mul x, undef) -> 0
2008   if (N0.isUndef() || N1.isUndef())
2009     return DAG.getConstant(0, SDLoc(N), VT);
2010 
2011   bool N0IsConst = false;
2012   bool N1IsConst = false;
2013   bool N1IsOpaqueConst = false;
2014   bool N0IsOpaqueConst = false;
2015   APInt ConstValue0, ConstValue1;
2016   // fold vector ops
2017   if (VT.isVector()) {
2018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2019       return FoldedVOp;
2020 
2021     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
2022     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
2023   } else {
2024     N0IsConst = isa<ConstantSDNode>(N0);
2025     if (N0IsConst) {
2026       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2027       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2028     }
2029     N1IsConst = isa<ConstantSDNode>(N1);
2030     if (N1IsConst) {
2031       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2032       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2033     }
2034   }
2035 
2036   // fold (mul c1, c2) -> c1*c2
2037   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2038     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2039                                       N0.getNode(), N1.getNode());
2040 
2041   // canonicalize constant to RHS (vector doesn't have to splat)
2042   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2043      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2044     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2045   // fold (mul x, 0) -> 0
2046   if (N1IsConst && ConstValue1 == 0)
2047     return N1;
2048   // We require a splat of the entire scalar bit width for non-contiguous
2049   // bit patterns.
2050   bool IsFullSplat =
2051     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
2052   // fold (mul x, 1) -> x
2053   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2054     return N0;
2055   // fold (mul x, -1) -> 0-x
2056   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2057     SDLoc DL(N);
2058     return DAG.getNode(ISD::SUB, DL, VT,
2059                        DAG.getConstant(0, DL, VT), N0);
2060   }
2061   // fold (mul x, (1 << c)) -> x << c
2062   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2063       IsFullSplat) {
2064     SDLoc DL(N);
2065     return DAG.getNode(ISD::SHL, DL, VT, N0,
2066                        DAG.getConstant(ConstValue1.logBase2(), DL,
2067                                        getShiftAmountTy(N0.getValueType())));
2068   }
2069   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2070   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2071       IsFullSplat) {
2072     unsigned Log2Val = (-ConstValue1).logBase2();
2073     SDLoc DL(N);
2074     // FIXME: If the input is something that is easily negated (e.g. a
2075     // single-use add), we should put the negate there.
2076     return DAG.getNode(ISD::SUB, DL, VT,
2077                        DAG.getConstant(0, DL, VT),
2078                        DAG.getNode(ISD::SHL, DL, VT, N0,
2079                             DAG.getConstant(Log2Val, DL,
2080                                       getShiftAmountTy(N0.getValueType()))));
2081   }
2082 
2083   APInt Val;
2084   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2085   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
2086       (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2087                      isa<ConstantSDNode>(N0.getOperand(1)))) {
2088     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
2089                              N1, N0.getOperand(1));
2090     AddToWorklist(C3.getNode());
2091     return DAG.getNode(ISD::MUL, SDLoc(N), VT,
2092                        N0.getOperand(0), C3);
2093   }
2094 
2095   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2096   // use.
2097   {
2098     SDValue Sh(nullptr,0), Y(nullptr,0);
2099     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2100     if (N0.getOpcode() == ISD::SHL &&
2101         (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2102                        isa<ConstantSDNode>(N0.getOperand(1))) &&
2103         N0.getNode()->hasOneUse()) {
2104       Sh = N0; Y = N1;
2105     } else if (N1.getOpcode() == ISD::SHL &&
2106                isa<ConstantSDNode>(N1.getOperand(1)) &&
2107                N1.getNode()->hasOneUse()) {
2108       Sh = N1; Y = N0;
2109     }
2110 
2111     if (Sh.getNode()) {
2112       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2113                                 Sh.getOperand(0), Y);
2114       return DAG.getNode(ISD::SHL, SDLoc(N), VT,
2115                          Mul, Sh.getOperand(1));
2116     }
2117   }
2118 
2119   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2120   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2121       N0.getOpcode() == ISD::ADD &&
2122       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2123       isMulAddWithConstProfitable(N, N0, N1))
2124       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2125                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2126                                      N0.getOperand(0), N1),
2127                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2128                                      N0.getOperand(1), N1));
2129 
2130   // reassociate mul
2131   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2132     return RMUL;
2133 
2134   return SDValue();
2135 }
2136 
2137 /// Return true if divmod libcall is available.
2138 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2139                                      const TargetLowering &TLI) {
2140   RTLIB::Libcall LC;
2141   EVT NodeType = Node->getValueType(0);
2142   if (!NodeType.isSimple())
2143     return false;
2144   switch (NodeType.getSimpleVT().SimpleTy) {
2145   default: return false; // No libcall for vector types.
2146   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2147   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2148   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2149   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2150   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2151   }
2152 
2153   return TLI.getLibcallName(LC) != nullptr;
2154 }
2155 
2156 /// Issue divrem if both quotient and remainder are needed.
2157 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2158   if (Node->use_empty())
2159     return SDValue(); // This is a dead node, leave it alone.
2160 
2161   unsigned Opcode = Node->getOpcode();
2162   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2163   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2164 
2165   // DivMod lib calls can still work on non-legal types if using lib-calls.
2166   EVT VT = Node->getValueType(0);
2167   if (VT.isVector() || !VT.isInteger())
2168     return SDValue();
2169 
2170   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2171     return SDValue();
2172 
2173   // If DIVREM is going to get expanded into a libcall,
2174   // but there is no libcall available, then don't combine.
2175   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2176       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2177     return SDValue();
2178 
2179   // If div is legal, it's better to do the normal expansion
2180   unsigned OtherOpcode = 0;
2181   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2182     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2183     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2184       return SDValue();
2185   } else {
2186     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2187     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2188       return SDValue();
2189   }
2190 
2191   SDValue Op0 = Node->getOperand(0);
2192   SDValue Op1 = Node->getOperand(1);
2193   SDValue combined;
2194   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2195          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2196     SDNode *User = *UI;
2197     if (User == Node || User->use_empty())
2198       continue;
2199     // Convert the other matching node(s), too;
2200     // otherwise, the DIVREM may get target-legalized into something
2201     // target-specific that we won't be able to recognize.
2202     unsigned UserOpc = User->getOpcode();
2203     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2204         User->getOperand(0) == Op0 &&
2205         User->getOperand(1) == Op1) {
2206       if (!combined) {
2207         if (UserOpc == OtherOpcode) {
2208           SDVTList VTs = DAG.getVTList(VT, VT);
2209           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2210         } else if (UserOpc == DivRemOpc) {
2211           combined = SDValue(User, 0);
2212         } else {
2213           assert(UserOpc == Opcode);
2214           continue;
2215         }
2216       }
2217       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2218         CombineTo(User, combined);
2219       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2220         CombineTo(User, combined.getValue(1));
2221     }
2222   }
2223   return combined;
2224 }
2225 
2226 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2227   SDValue N0 = N->getOperand(0);
2228   SDValue N1 = N->getOperand(1);
2229   EVT VT = N->getValueType(0);
2230 
2231   // fold vector ops
2232   if (VT.isVector())
2233     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2234       return FoldedVOp;
2235 
2236   SDLoc DL(N);
2237 
2238   // fold (sdiv c1, c2) -> c1/c2
2239   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2240   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2241   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2242     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2243   // fold (sdiv X, 1) -> X
2244   if (N1C && N1C->isOne())
2245     return N0;
2246   // fold (sdiv X, -1) -> 0-X
2247   if (N1C && N1C->isAllOnesValue())
2248     return DAG.getNode(ISD::SUB, DL, VT,
2249                        DAG.getConstant(0, DL, VT), N0);
2250 
2251   // If we know the sign bits of both operands are zero, strength reduce to a
2252   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2253   if (!VT.isVector()) {
2254     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2255       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2256   }
2257 
2258   // fold (sdiv X, pow2) -> simple ops after legalize
2259   // FIXME: We check for the exact bit here because the generic lowering gives
2260   // better results in that case. The target-specific lowering should learn how
2261   // to handle exact sdivs efficiently.
2262   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2263       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2264       (N1C->getAPIntValue().isPowerOf2() ||
2265        (-N1C->getAPIntValue()).isPowerOf2())) {
2266     // Target-specific implementation of sdiv x, pow2.
2267     if (SDValue Res = BuildSDIVPow2(N))
2268       return Res;
2269 
2270     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2271 
2272     // Splat the sign bit into the register
2273     SDValue SGN =
2274         DAG.getNode(ISD::SRA, DL, VT, N0,
2275                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2276                                     getShiftAmountTy(N0.getValueType())));
2277     AddToWorklist(SGN.getNode());
2278 
2279     // Add (N0 < 0) ? abs2 - 1 : 0;
2280     SDValue SRL =
2281         DAG.getNode(ISD::SRL, DL, VT, SGN,
2282                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2283                                     getShiftAmountTy(SGN.getValueType())));
2284     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2285     AddToWorklist(SRL.getNode());
2286     AddToWorklist(ADD.getNode());    // Divide by pow2
2287     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2288                   DAG.getConstant(lg2, DL,
2289                                   getShiftAmountTy(ADD.getValueType())));
2290 
2291     // If we're dividing by a positive value, we're done.  Otherwise, we must
2292     // negate the result.
2293     if (N1C->getAPIntValue().isNonNegative())
2294       return SRA;
2295 
2296     AddToWorklist(SRA.getNode());
2297     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2298   }
2299 
2300   // If integer divide is expensive and we satisfy the requirements, emit an
2301   // alternate sequence.  Targets may check function attributes for size/speed
2302   // trade-offs.
2303   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2304   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2305     if (SDValue Op = BuildSDIV(N))
2306       return Op;
2307 
2308   // sdiv, srem -> sdivrem
2309   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2310   // Otherwise, we break the simplification logic in visitREM().
2311   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2312     if (SDValue DivRem = useDivRem(N))
2313         return DivRem;
2314 
2315   // undef / X -> 0
2316   if (N0.isUndef())
2317     return DAG.getConstant(0, DL, VT);
2318   // X / undef -> undef
2319   if (N1.isUndef())
2320     return N1;
2321 
2322   return SDValue();
2323 }
2324 
2325 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2326   SDValue N0 = N->getOperand(0);
2327   SDValue N1 = N->getOperand(1);
2328   EVT VT = N->getValueType(0);
2329 
2330   // fold vector ops
2331   if (VT.isVector())
2332     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2333       return FoldedVOp;
2334 
2335   SDLoc DL(N);
2336 
2337   // fold (udiv c1, c2) -> c1/c2
2338   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2339   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2340   if (N0C && N1C)
2341     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2342                                                     N0C, N1C))
2343       return Folded;
2344   // fold (udiv x, (1 << c)) -> x >>u c
2345   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
2346     return DAG.getNode(ISD::SRL, DL, VT, N0,
2347                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
2348                                        getShiftAmountTy(N0.getValueType())));
2349 
2350   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2351   if (N1.getOpcode() == ISD::SHL) {
2352     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2353       if (SHC->getAPIntValue().isPowerOf2()) {
2354         EVT ADDVT = N1.getOperand(1).getValueType();
2355         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
2356                                   N1.getOperand(1),
2357                                   DAG.getConstant(SHC->getAPIntValue()
2358                                                                   .logBase2(),
2359                                                   DL, ADDVT));
2360         AddToWorklist(Add.getNode());
2361         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2362       }
2363     }
2364   }
2365 
2366   // fold (udiv x, c) -> alternate
2367   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2368   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2369     if (SDValue Op = BuildUDIV(N))
2370       return Op;
2371 
2372   // sdiv, srem -> sdivrem
2373   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2374   // Otherwise, we break the simplification logic in visitREM().
2375   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2376     if (SDValue DivRem = useDivRem(N))
2377         return DivRem;
2378 
2379   // undef / X -> 0
2380   if (N0.isUndef())
2381     return DAG.getConstant(0, DL, VT);
2382   // X / undef -> undef
2383   if (N1.isUndef())
2384     return N1;
2385 
2386   return SDValue();
2387 }
2388 
2389 // handles ISD::SREM and ISD::UREM
2390 SDValue DAGCombiner::visitREM(SDNode *N) {
2391   unsigned Opcode = N->getOpcode();
2392   SDValue N0 = N->getOperand(0);
2393   SDValue N1 = N->getOperand(1);
2394   EVT VT = N->getValueType(0);
2395   bool isSigned = (Opcode == ISD::SREM);
2396   SDLoc DL(N);
2397 
2398   // fold (rem c1, c2) -> c1%c2
2399   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2400   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2401   if (N0C && N1C)
2402     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2403       return Folded;
2404 
2405   if (isSigned) {
2406     // If we know the sign bits of both operands are zero, strength reduce to a
2407     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2408     if (!VT.isVector()) {
2409       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2410         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2411     }
2412   } else {
2413     // fold (urem x, pow2) -> (and x, pow2-1)
2414     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2415         N1C->getAPIntValue().isPowerOf2()) {
2416       return DAG.getNode(ISD::AND, DL, VT, N0,
2417                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
2418     }
2419     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2420     if (N1.getOpcode() == ISD::SHL) {
2421       if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2422         if (SHC->getAPIntValue().isPowerOf2()) {
2423           SDValue Add =
2424             DAG.getNode(ISD::ADD, DL, VT, N1,
2425                  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
2426                                  VT));
2427           AddToWorklist(Add.getNode());
2428           return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2429         }
2430       }
2431     }
2432   }
2433 
2434   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2435 
2436   // If X/C can be simplified by the division-by-constant logic, lower
2437   // X%C to the equivalent of X-X/C*C.
2438   // To avoid mangling nodes, this simplification requires that the combine()
2439   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2440   // against this by skipping the simplification if isIntDivCheap().  When
2441   // div is not cheap, combine will not return a DIVREM.  Regardless,
2442   // checking cheapness here makes sense since the simplification results in
2443   // fatter code.
2444   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2445     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2446     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2447     AddToWorklist(Div.getNode());
2448     SDValue OptimizedDiv = combine(Div.getNode());
2449     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2450       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2451              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2452       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2453       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2454       AddToWorklist(Mul.getNode());
2455       return Sub;
2456     }
2457   }
2458 
2459   // sdiv, srem -> sdivrem
2460   if (SDValue DivRem = useDivRem(N))
2461     return DivRem.getValue(1);
2462 
2463   // undef % X -> 0
2464   if (N0.isUndef())
2465     return DAG.getConstant(0, DL, VT);
2466   // X % undef -> undef
2467   if (N1.isUndef())
2468     return N1;
2469 
2470   return SDValue();
2471 }
2472 
2473 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2474   SDValue N0 = N->getOperand(0);
2475   SDValue N1 = N->getOperand(1);
2476   EVT VT = N->getValueType(0);
2477   SDLoc DL(N);
2478 
2479   // fold (mulhs x, 0) -> 0
2480   if (isNullConstant(N1))
2481     return N1;
2482   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2483   if (isOneConstant(N1)) {
2484     SDLoc DL(N);
2485     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2486                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
2487                                        DL,
2488                                        getShiftAmountTy(N0.getValueType())));
2489   }
2490   // fold (mulhs x, undef) -> 0
2491   if (N0.isUndef() || N1.isUndef())
2492     return DAG.getConstant(0, SDLoc(N), VT);
2493 
2494   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2495   // plus a shift.
2496   if (VT.isSimple() && !VT.isVector()) {
2497     MVT Simple = VT.getSimpleVT();
2498     unsigned SimpleSize = Simple.getSizeInBits();
2499     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2500     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2501       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2502       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2503       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2504       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2505             DAG.getConstant(SimpleSize, DL,
2506                             getShiftAmountTy(N1.getValueType())));
2507       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2508     }
2509   }
2510 
2511   return SDValue();
2512 }
2513 
2514 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2515   SDValue N0 = N->getOperand(0);
2516   SDValue N1 = N->getOperand(1);
2517   EVT VT = N->getValueType(0);
2518   SDLoc DL(N);
2519 
2520   // fold (mulhu x, 0) -> 0
2521   if (isNullConstant(N1))
2522     return N1;
2523   // fold (mulhu x, 1) -> 0
2524   if (isOneConstant(N1))
2525     return DAG.getConstant(0, DL, N0.getValueType());
2526   // fold (mulhu x, undef) -> 0
2527   if (N0.isUndef() || N1.isUndef())
2528     return DAG.getConstant(0, DL, VT);
2529 
2530   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2531   // plus a shift.
2532   if (VT.isSimple() && !VT.isVector()) {
2533     MVT Simple = VT.getSimpleVT();
2534     unsigned SimpleSize = Simple.getSizeInBits();
2535     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2536     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2537       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2538       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2539       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2540       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2541             DAG.getConstant(SimpleSize, DL,
2542                             getShiftAmountTy(N1.getValueType())));
2543       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2544     }
2545   }
2546 
2547   return SDValue();
2548 }
2549 
2550 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2551 /// give the opcodes for the two computations that are being performed. Return
2552 /// true if a simplification was made.
2553 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2554                                                 unsigned HiOp) {
2555   // If the high half is not needed, just compute the low half.
2556   bool HiExists = N->hasAnyUseOfValue(1);
2557   if (!HiExists &&
2558       (!LegalOperations ||
2559        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2560     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2561     return CombineTo(N, Res, Res);
2562   }
2563 
2564   // If the low half is not needed, just compute the high half.
2565   bool LoExists = N->hasAnyUseOfValue(0);
2566   if (!LoExists &&
2567       (!LegalOperations ||
2568        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2569     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2570     return CombineTo(N, Res, Res);
2571   }
2572 
2573   // If both halves are used, return as it is.
2574   if (LoExists && HiExists)
2575     return SDValue();
2576 
2577   // If the two computed results can be simplified separately, separate them.
2578   if (LoExists) {
2579     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2580     AddToWorklist(Lo.getNode());
2581     SDValue LoOpt = combine(Lo.getNode());
2582     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2583         (!LegalOperations ||
2584          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2585       return CombineTo(N, LoOpt, LoOpt);
2586   }
2587 
2588   if (HiExists) {
2589     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2590     AddToWorklist(Hi.getNode());
2591     SDValue HiOpt = combine(Hi.getNode());
2592     if (HiOpt.getNode() && HiOpt != Hi &&
2593         (!LegalOperations ||
2594          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2595       return CombineTo(N, HiOpt, HiOpt);
2596   }
2597 
2598   return SDValue();
2599 }
2600 
2601 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2602   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2603     return Res;
2604 
2605   EVT VT = N->getValueType(0);
2606   SDLoc DL(N);
2607 
2608   // If the type is twice as wide is legal, transform the mulhu to a wider
2609   // multiply plus a shift.
2610   if (VT.isSimple() && !VT.isVector()) {
2611     MVT Simple = VT.getSimpleVT();
2612     unsigned SimpleSize = Simple.getSizeInBits();
2613     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2614     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2615       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2616       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2617       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2618       // Compute the high part as N1.
2619       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2620             DAG.getConstant(SimpleSize, DL,
2621                             getShiftAmountTy(Lo.getValueType())));
2622       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2623       // Compute the low part as N0.
2624       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2625       return CombineTo(N, Lo, Hi);
2626     }
2627   }
2628 
2629   return SDValue();
2630 }
2631 
2632 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2633   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2634     return Res;
2635 
2636   EVT VT = N->getValueType(0);
2637   SDLoc DL(N);
2638 
2639   // If the type is twice as wide is legal, transform the mulhu to a wider
2640   // multiply plus a shift.
2641   if (VT.isSimple() && !VT.isVector()) {
2642     MVT Simple = VT.getSimpleVT();
2643     unsigned SimpleSize = Simple.getSizeInBits();
2644     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2645     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2646       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2647       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2648       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2649       // Compute the high part as N1.
2650       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2651             DAG.getConstant(SimpleSize, DL,
2652                             getShiftAmountTy(Lo.getValueType())));
2653       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2654       // Compute the low part as N0.
2655       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2656       return CombineTo(N, Lo, Hi);
2657     }
2658   }
2659 
2660   return SDValue();
2661 }
2662 
2663 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2664   // (smulo x, 2) -> (saddo x, x)
2665   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2666     if (C2->getAPIntValue() == 2)
2667       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2668                          N->getOperand(0), N->getOperand(0));
2669 
2670   return SDValue();
2671 }
2672 
2673 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2674   // (umulo x, 2) -> (uaddo x, x)
2675   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2676     if (C2->getAPIntValue() == 2)
2677       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2678                          N->getOperand(0), N->getOperand(0));
2679 
2680   return SDValue();
2681 }
2682 
2683 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2684   SDValue N0 = N->getOperand(0);
2685   SDValue N1 = N->getOperand(1);
2686   EVT VT = N0.getValueType();
2687 
2688   // fold vector ops
2689   if (VT.isVector())
2690     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2691       return FoldedVOp;
2692 
2693   // fold (add c1, c2) -> c1+c2
2694   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
2695   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2696   if (N0C && N1C)
2697     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
2698 
2699   // canonicalize constant to RHS
2700   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2701      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2702     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
2703 
2704   return SDValue();
2705 }
2706 
2707 /// If this is a binary operator with two operands of the same opcode, try to
2708 /// simplify it.
2709 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2710   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2711   EVT VT = N0.getValueType();
2712   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2713 
2714   // Bail early if none of these transforms apply.
2715   if (N0.getNode()->getNumOperands() == 0) return SDValue();
2716 
2717   // For each of OP in AND/OR/XOR:
2718   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2719   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2720   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2721   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2722   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2723   //
2724   // do not sink logical op inside of a vector extend, since it may combine
2725   // into a vsetcc.
2726   EVT Op0VT = N0.getOperand(0).getValueType();
2727   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2728        N0.getOpcode() == ISD::SIGN_EXTEND ||
2729        N0.getOpcode() == ISD::BSWAP ||
2730        // Avoid infinite looping with PromoteIntBinOp.
2731        (N0.getOpcode() == ISD::ANY_EXTEND &&
2732         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2733        (N0.getOpcode() == ISD::TRUNCATE &&
2734         (!TLI.isZExtFree(VT, Op0VT) ||
2735          !TLI.isTruncateFree(Op0VT, VT)) &&
2736         TLI.isTypeLegal(Op0VT))) &&
2737       !VT.isVector() &&
2738       Op0VT == N1.getOperand(0).getValueType() &&
2739       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2740     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2741                                  N0.getOperand(0).getValueType(),
2742                                  N0.getOperand(0), N1.getOperand(0));
2743     AddToWorklist(ORNode.getNode());
2744     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2745   }
2746 
2747   // For each of OP in SHL/SRL/SRA/AND...
2748   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2749   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
2750   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2751   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2752        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2753       N0.getOperand(1) == N1.getOperand(1)) {
2754     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2755                                  N0.getOperand(0).getValueType(),
2756                                  N0.getOperand(0), N1.getOperand(0));
2757     AddToWorklist(ORNode.getNode());
2758     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2759                        ORNode, N0.getOperand(1));
2760   }
2761 
2762   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2763   // Only perform this optimization after type legalization and before
2764   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2765   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2766   // we don't want to undo this promotion.
2767   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2768   // on scalars.
2769   if ((N0.getOpcode() == ISD::BITCAST ||
2770        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2771       Level == AfterLegalizeTypes) {
2772     SDValue In0 = N0.getOperand(0);
2773     SDValue In1 = N1.getOperand(0);
2774     EVT In0Ty = In0.getValueType();
2775     EVT In1Ty = In1.getValueType();
2776     SDLoc DL(N);
2777     // If both incoming values are integers, and the original types are the
2778     // same.
2779     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2780       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2781       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2782       AddToWorklist(Op.getNode());
2783       return BC;
2784     }
2785   }
2786 
2787   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2788   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2789   // If both shuffles use the same mask, and both shuffle within a single
2790   // vector, then it is worthwhile to move the swizzle after the operation.
2791   // The type-legalizer generates this pattern when loading illegal
2792   // vector types from memory. In many cases this allows additional shuffle
2793   // optimizations.
2794   // There are other cases where moving the shuffle after the xor/and/or
2795   // is profitable even if shuffles don't perform a swizzle.
2796   // If both shuffles use the same mask, and both shuffles have the same first
2797   // or second operand, then it might still be profitable to move the shuffle
2798   // after the xor/and/or operation.
2799   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
2800     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2801     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2802 
2803     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2804            "Inputs to shuffles are not the same type");
2805 
2806     // Check that both shuffles use the same mask. The masks are known to be of
2807     // the same length because the result vector type is the same.
2808     // Check also that shuffles have only one use to avoid introducing extra
2809     // instructions.
2810     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2811         SVN0->getMask().equals(SVN1->getMask())) {
2812       SDValue ShOp = N0->getOperand(1);
2813 
2814       // Don't try to fold this node if it requires introducing a
2815       // build vector of all zeros that might be illegal at this stage.
2816       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2817         if (!LegalTypes)
2818           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2819         else
2820           ShOp = SDValue();
2821       }
2822 
2823       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2824       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
2825       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2826       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2827         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2828                                       N0->getOperand(0), N1->getOperand(0));
2829         AddToWorklist(NewNode.getNode());
2830         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2831                                     &SVN0->getMask()[0]);
2832       }
2833 
2834       // Don't try to fold this node if it requires introducing a
2835       // build vector of all zeros that might be illegal at this stage.
2836       ShOp = N0->getOperand(0);
2837       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2838         if (!LegalTypes)
2839           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2840         else
2841           ShOp = SDValue();
2842       }
2843 
2844       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2845       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
2846       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2847       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2848         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2849                                       N0->getOperand(1), N1->getOperand(1));
2850         AddToWorklist(NewNode.getNode());
2851         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2852                                     &SVN0->getMask()[0]);
2853       }
2854     }
2855   }
2856 
2857   return SDValue();
2858 }
2859 
2860 /// This contains all DAGCombine rules which reduce two values combined by
2861 /// an And operation to a single value. This makes them reusable in the context
2862 /// of visitSELECT(). Rules involving constants are not included as
2863 /// visitSELECT() already handles those cases.
2864 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2865                                   SDNode *LocReference) {
2866   EVT VT = N1.getValueType();
2867 
2868   // fold (and x, undef) -> 0
2869   if (N0.isUndef() || N1.isUndef())
2870     return DAG.getConstant(0, SDLoc(LocReference), VT);
2871   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2872   SDValue LL, LR, RL, RR, CC0, CC1;
2873   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2874     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2875     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2876 
2877     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2878         LL.getValueType().isInteger()) {
2879       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2880       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2881         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2882                                      LR.getValueType(), LL, RL);
2883         AddToWorklist(ORNode.getNode());
2884         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2885       }
2886       if (isAllOnesConstant(LR)) {
2887         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2888         if (Op1 == ISD::SETEQ) {
2889           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2890                                         LR.getValueType(), LL, RL);
2891           AddToWorklist(ANDNode.getNode());
2892           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2893         }
2894         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2895         if (Op1 == ISD::SETGT) {
2896           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2897                                        LR.getValueType(), LL, RL);
2898           AddToWorklist(ORNode.getNode());
2899           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2900         }
2901       }
2902     }
2903     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2904     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2905         Op0 == Op1 && LL.getValueType().isInteger() &&
2906       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2907                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2908       SDLoc DL(N0);
2909       SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2910                                     LL, DAG.getConstant(1, DL,
2911                                                         LL.getValueType()));
2912       AddToWorklist(ADDNode.getNode());
2913       return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
2914                           DAG.getConstant(2, DL, LL.getValueType()),
2915                           ISD::SETUGE);
2916     }
2917     // canonicalize equivalent to ll == rl
2918     if (LL == RR && LR == RL) {
2919       Op1 = ISD::getSetCCSwappedOperands(Op1);
2920       std::swap(RL, RR);
2921     }
2922     if (LL == RL && LR == RR) {
2923       bool isInteger = LL.getValueType().isInteger();
2924       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2925       if (Result != ISD::SETCC_INVALID &&
2926           (!LegalOperations ||
2927            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2928             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
2929         EVT CCVT = getSetCCResultType(LL.getValueType());
2930         if (N0.getValueType() == CCVT ||
2931             (!LegalOperations && N0.getValueType() == MVT::i1))
2932           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
2933                               LL, LR, Result);
2934       }
2935     }
2936   }
2937 
2938   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
2939       VT.getSizeInBits() <= 64) {
2940     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2941       APInt ADDC = ADDI->getAPIntValue();
2942       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2943         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
2944         // immediate for an add, but it is legal if its top c2 bits are set,
2945         // transform the ADD so the immediate doesn't need to be materialized
2946         // in a register.
2947         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
2948           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
2949                                              SRLI->getZExtValue());
2950           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
2951             ADDC |= Mask;
2952             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2953               SDLoc DL(N0);
2954               SDValue NewAdd =
2955                 DAG.getNode(ISD::ADD, DL, VT,
2956                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
2957               CombineTo(N0.getNode(), NewAdd);
2958               // Return N so it doesn't get rechecked!
2959               return SDValue(LocReference, 0);
2960             }
2961           }
2962         }
2963       }
2964     }
2965   }
2966 
2967   return SDValue();
2968 }
2969 
2970 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
2971                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
2972                                    bool &NarrowLoad) {
2973   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
2974 
2975   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
2976     return false;
2977 
2978   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
2979   LoadedVT = LoadN->getMemoryVT();
2980 
2981   if (ExtVT == LoadedVT &&
2982       (!LegalOperations ||
2983        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
2984     // ZEXTLOAD will match without needing to change the size of the value being
2985     // loaded.
2986     NarrowLoad = false;
2987     return true;
2988   }
2989 
2990   // Do not change the width of a volatile load.
2991   if (LoadN->isVolatile())
2992     return false;
2993 
2994   // Do not generate loads of non-round integer types since these can
2995   // be expensive (and would be wrong if the type is not byte sized).
2996   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
2997     return false;
2998 
2999   if (LegalOperations &&
3000       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3001     return false;
3002 
3003   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3004     return false;
3005 
3006   NarrowLoad = true;
3007   return true;
3008 }
3009 
3010 SDValue DAGCombiner::visitAND(SDNode *N) {
3011   SDValue N0 = N->getOperand(0);
3012   SDValue N1 = N->getOperand(1);
3013   EVT VT = N1.getValueType();
3014 
3015   // fold vector ops
3016   if (VT.isVector()) {
3017     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3018       return FoldedVOp;
3019 
3020     // fold (and x, 0) -> 0, vector edition
3021     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3022       // do not return N0, because undef node may exist in N0
3023       return DAG.getConstant(
3024           APInt::getNullValue(
3025               N0.getValueType().getScalarType().getSizeInBits()),
3026           SDLoc(N), N0.getValueType());
3027     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3028       // do not return N1, because undef node may exist in N1
3029       return DAG.getConstant(
3030           APInt::getNullValue(
3031               N1.getValueType().getScalarType().getSizeInBits()),
3032           SDLoc(N), N1.getValueType());
3033 
3034     // fold (and x, -1) -> x, vector edition
3035     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3036       return N1;
3037     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3038       return N0;
3039   }
3040 
3041   // fold (and c1, c2) -> c1&c2
3042   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3043   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3044   if (N0C && N1C && !N1C->isOpaque())
3045     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3046   // canonicalize constant to RHS
3047   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3048      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3049     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3050   // fold (and x, -1) -> x
3051   if (isAllOnesConstant(N1))
3052     return N0;
3053   // if (and x, c) is known to be zero, return 0
3054   unsigned BitWidth = VT.getScalarType().getSizeInBits();
3055   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3056                                    APInt::getAllOnesValue(BitWidth)))
3057     return DAG.getConstant(0, SDLoc(N), VT);
3058   // reassociate and
3059   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3060     return RAND;
3061   // fold (and (or x, C), D) -> D if (C & D) == D
3062   if (N1C && N0.getOpcode() == ISD::OR)
3063     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
3064       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3065         return N1;
3066   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3067   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3068     SDValue N0Op0 = N0.getOperand(0);
3069     APInt Mask = ~N1C->getAPIntValue();
3070     Mask = Mask.trunc(N0Op0.getValueSizeInBits());
3071     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3072       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3073                                  N0.getValueType(), N0Op0);
3074 
3075       // Replace uses of the AND with uses of the Zero extend node.
3076       CombineTo(N, Zext);
3077 
3078       // We actually want to replace all uses of the any_extend with the
3079       // zero_extend, to avoid duplicating things.  This will later cause this
3080       // AND to be folded.
3081       CombineTo(N0.getNode(), Zext);
3082       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3083     }
3084   }
3085   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3086   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3087   // already be zero by virtue of the width of the base type of the load.
3088   //
3089   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3090   // more cases.
3091   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3092        N0.getOperand(0).getOpcode() == ISD::LOAD) ||
3093       N0.getOpcode() == ISD::LOAD) {
3094     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3095                                          N0 : N0.getOperand(0) );
3096 
3097     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3098     // This can be a pure constant or a vector splat, in which case we treat the
3099     // vector as a scalar and use the splat value.
3100     APInt Constant = APInt::getNullValue(1);
3101     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3102       Constant = C->getAPIntValue();
3103     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3104       APInt SplatValue, SplatUndef;
3105       unsigned SplatBitSize;
3106       bool HasAnyUndefs;
3107       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3108                                              SplatBitSize, HasAnyUndefs);
3109       if (IsSplat) {
3110         // Undef bits can contribute to a possible optimisation if set, so
3111         // set them.
3112         SplatValue |= SplatUndef;
3113 
3114         // The splat value may be something like "0x00FFFFFF", which means 0 for
3115         // the first vector value and FF for the rest, repeating. We need a mask
3116         // that will apply equally to all members of the vector, so AND all the
3117         // lanes of the constant together.
3118         EVT VT = Vector->getValueType(0);
3119         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
3120 
3121         // If the splat value has been compressed to a bitlength lower
3122         // than the size of the vector lane, we need to re-expand it to
3123         // the lane size.
3124         if (BitWidth > SplatBitSize)
3125           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3126                SplatBitSize < BitWidth;
3127                SplatBitSize = SplatBitSize * 2)
3128             SplatValue |= SplatValue.shl(SplatBitSize);
3129 
3130         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3131         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3132         if (SplatBitSize % BitWidth == 0) {
3133           Constant = APInt::getAllOnesValue(BitWidth);
3134           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3135             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3136         }
3137       }
3138     }
3139 
3140     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3141     // actually legal and isn't going to get expanded, else this is a false
3142     // optimisation.
3143     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3144                                                     Load->getValueType(0),
3145                                                     Load->getMemoryVT());
3146 
3147     // Resize the constant to the same size as the original memory access before
3148     // extension. If it is still the AllOnesValue then this AND is completely
3149     // unneeded.
3150     Constant =
3151       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
3152 
3153     bool B;
3154     switch (Load->getExtensionType()) {
3155     default: B = false; break;
3156     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3157     case ISD::ZEXTLOAD:
3158     case ISD::NON_EXTLOAD: B = true; break;
3159     }
3160 
3161     if (B && Constant.isAllOnesValue()) {
3162       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3163       // preserve semantics once we get rid of the AND.
3164       SDValue NewLoad(Load, 0);
3165       if (Load->getExtensionType() == ISD::EXTLOAD) {
3166         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3167                               Load->getValueType(0), SDLoc(Load),
3168                               Load->getChain(), Load->getBasePtr(),
3169                               Load->getOffset(), Load->getMemoryVT(),
3170                               Load->getMemOperand());
3171         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3172         if (Load->getNumValues() == 3) {
3173           // PRE/POST_INC loads have 3 values.
3174           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3175                            NewLoad.getValue(2) };
3176           CombineTo(Load, To, 3, true);
3177         } else {
3178           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3179         }
3180       }
3181 
3182       // Fold the AND away, taking care not to fold to the old load node if we
3183       // replaced it.
3184       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3185 
3186       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3187     }
3188   }
3189 
3190   // fold (and (load x), 255) -> (zextload x, i8)
3191   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3192   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3193   if (N1C && (N0.getOpcode() == ISD::LOAD ||
3194               (N0.getOpcode() == ISD::ANY_EXTEND &&
3195                N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3196     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3197     LoadSDNode *LN0 = HasAnyExt
3198       ? cast<LoadSDNode>(N0.getOperand(0))
3199       : cast<LoadSDNode>(N0);
3200     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3201         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3202       auto NarrowLoad = false;
3203       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3204       EVT ExtVT, LoadedVT;
3205       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3206                            NarrowLoad)) {
3207         if (!NarrowLoad) {
3208           SDValue NewLoad =
3209             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3210                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3211                            LN0->getMemOperand());
3212           AddToWorklist(N);
3213           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3214           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3215         } else {
3216           EVT PtrType = LN0->getOperand(1).getValueType();
3217 
3218           unsigned Alignment = LN0->getAlignment();
3219           SDValue NewPtr = LN0->getBasePtr();
3220 
3221           // For big endian targets, we need to add an offset to the pointer
3222           // to load the correct bytes.  For little endian systems, we merely
3223           // need to read fewer bytes from the same pointer.
3224           if (DAG.getDataLayout().isBigEndian()) {
3225             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3226             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3227             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3228             SDLoc DL(LN0);
3229             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3230                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3231             Alignment = MinAlign(Alignment, PtrOff);
3232           }
3233 
3234           AddToWorklist(NewPtr.getNode());
3235 
3236           SDValue Load =
3237             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3238                            LN0->getChain(), NewPtr,
3239                            LN0->getPointerInfo(),
3240                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
3241                            LN0->isInvariant(), Alignment, LN0->getAAInfo());
3242           AddToWorklist(N);
3243           CombineTo(LN0, Load, Load.getValue(1));
3244           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3245         }
3246       }
3247     }
3248   }
3249 
3250   if (SDValue Combined = visitANDLike(N0, N1, N))
3251     return Combined;
3252 
3253   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3254   if (N0.getOpcode() == N1.getOpcode())
3255     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3256       return Tmp;
3257 
3258   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3259   // fold (and (sra)) -> (and (srl)) when possible.
3260   if (!VT.isVector() &&
3261       SimplifyDemandedBits(SDValue(N, 0)))
3262     return SDValue(N, 0);
3263 
3264   // fold (zext_inreg (extload x)) -> (zextload x)
3265   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3266     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3267     EVT MemVT = LN0->getMemoryVT();
3268     // If we zero all the possible extended bits, then we can turn this into
3269     // a zextload if we are running before legalize or the operation is legal.
3270     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3271     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3272                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3273         ((!LegalOperations && !LN0->isVolatile()) ||
3274          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3275       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3276                                        LN0->getChain(), LN0->getBasePtr(),
3277                                        MemVT, LN0->getMemOperand());
3278       AddToWorklist(N);
3279       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3280       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3281     }
3282   }
3283   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3284   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3285       N0.hasOneUse()) {
3286     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3287     EVT MemVT = LN0->getMemoryVT();
3288     // If we zero all the possible extended bits, then we can turn this into
3289     // a zextload if we are running before legalize or the operation is legal.
3290     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3291     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3292                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3293         ((!LegalOperations && !LN0->isVolatile()) ||
3294          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3295       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3296                                        LN0->getChain(), LN0->getBasePtr(),
3297                                        MemVT, LN0->getMemOperand());
3298       AddToWorklist(N);
3299       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3300       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3301     }
3302   }
3303   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3304   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3305     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3306                                            N0.getOperand(1), false))
3307       return BSwap;
3308   }
3309 
3310   return SDValue();
3311 }
3312 
3313 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3314 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3315                                         bool DemandHighBits) {
3316   if (!LegalOperations)
3317     return SDValue();
3318 
3319   EVT VT = N->getValueType(0);
3320   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3321     return SDValue();
3322   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3323     return SDValue();
3324 
3325   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3326   bool LookPassAnd0 = false;
3327   bool LookPassAnd1 = false;
3328   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3329       std::swap(N0, N1);
3330   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3331       std::swap(N0, N1);
3332   if (N0.getOpcode() == ISD::AND) {
3333     if (!N0.getNode()->hasOneUse())
3334       return SDValue();
3335     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3336     if (!N01C || N01C->getZExtValue() != 0xFF00)
3337       return SDValue();
3338     N0 = N0.getOperand(0);
3339     LookPassAnd0 = true;
3340   }
3341 
3342   if (N1.getOpcode() == ISD::AND) {
3343     if (!N1.getNode()->hasOneUse())
3344       return SDValue();
3345     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3346     if (!N11C || N11C->getZExtValue() != 0xFF)
3347       return SDValue();
3348     N1 = N1.getOperand(0);
3349     LookPassAnd1 = true;
3350   }
3351 
3352   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3353     std::swap(N0, N1);
3354   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3355     return SDValue();
3356   if (!N0.getNode()->hasOneUse() ||
3357       !N1.getNode()->hasOneUse())
3358     return SDValue();
3359 
3360   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3361   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3362   if (!N01C || !N11C)
3363     return SDValue();
3364   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3365     return SDValue();
3366 
3367   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3368   SDValue N00 = N0->getOperand(0);
3369   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3370     if (!N00.getNode()->hasOneUse())
3371       return SDValue();
3372     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3373     if (!N001C || N001C->getZExtValue() != 0xFF)
3374       return SDValue();
3375     N00 = N00.getOperand(0);
3376     LookPassAnd0 = true;
3377   }
3378 
3379   SDValue N10 = N1->getOperand(0);
3380   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3381     if (!N10.getNode()->hasOneUse())
3382       return SDValue();
3383     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3384     if (!N101C || N101C->getZExtValue() != 0xFF00)
3385       return SDValue();
3386     N10 = N10.getOperand(0);
3387     LookPassAnd1 = true;
3388   }
3389 
3390   if (N00 != N10)
3391     return SDValue();
3392 
3393   // Make sure everything beyond the low halfword gets set to zero since the SRL
3394   // 16 will clear the top bits.
3395   unsigned OpSizeInBits = VT.getSizeInBits();
3396   if (DemandHighBits && OpSizeInBits > 16) {
3397     // If the left-shift isn't masked out then the only way this is a bswap is
3398     // if all bits beyond the low 8 are 0. In that case the entire pattern
3399     // reduces to a left shift anyway: leave it for other parts of the combiner.
3400     if (!LookPassAnd0)
3401       return SDValue();
3402 
3403     // However, if the right shift isn't masked out then it might be because
3404     // it's not needed. See if we can spot that too.
3405     if (!LookPassAnd1 &&
3406         !DAG.MaskedValueIsZero(
3407             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3408       return SDValue();
3409   }
3410 
3411   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3412   if (OpSizeInBits > 16) {
3413     SDLoc DL(N);
3414     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3415                       DAG.getConstant(OpSizeInBits - 16, DL,
3416                                       getShiftAmountTy(VT)));
3417   }
3418   return Res;
3419 }
3420 
3421 /// Return true if the specified node is an element that makes up a 32-bit
3422 /// packed halfword byteswap.
3423 /// ((x & 0x000000ff) << 8) |
3424 /// ((x & 0x0000ff00) >> 8) |
3425 /// ((x & 0x00ff0000) << 8) |
3426 /// ((x & 0xff000000) >> 8)
3427 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3428   if (!N.getNode()->hasOneUse())
3429     return false;
3430 
3431   unsigned Opc = N.getOpcode();
3432   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3433     return false;
3434 
3435   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3436   if (!N1C)
3437     return false;
3438 
3439   unsigned Num;
3440   switch (N1C->getZExtValue()) {
3441   default:
3442     return false;
3443   case 0xFF:       Num = 0; break;
3444   case 0xFF00:     Num = 1; break;
3445   case 0xFF0000:   Num = 2; break;
3446   case 0xFF000000: Num = 3; break;
3447   }
3448 
3449   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3450   SDValue N0 = N.getOperand(0);
3451   if (Opc == ISD::AND) {
3452     if (Num == 0 || Num == 2) {
3453       // (x >> 8) & 0xff
3454       // (x >> 8) & 0xff0000
3455       if (N0.getOpcode() != ISD::SRL)
3456         return false;
3457       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3458       if (!C || C->getZExtValue() != 8)
3459         return false;
3460     } else {
3461       // (x << 8) & 0xff00
3462       // (x << 8) & 0xff000000
3463       if (N0.getOpcode() != ISD::SHL)
3464         return false;
3465       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3466       if (!C || C->getZExtValue() != 8)
3467         return false;
3468     }
3469   } else if (Opc == ISD::SHL) {
3470     // (x & 0xff) << 8
3471     // (x & 0xff0000) << 8
3472     if (Num != 0 && Num != 2)
3473       return false;
3474     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3475     if (!C || C->getZExtValue() != 8)
3476       return false;
3477   } else { // Opc == ISD::SRL
3478     // (x & 0xff00) >> 8
3479     // (x & 0xff000000) >> 8
3480     if (Num != 1 && Num != 3)
3481       return false;
3482     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3483     if (!C || C->getZExtValue() != 8)
3484       return false;
3485   }
3486 
3487   if (Parts[Num])
3488     return false;
3489 
3490   Parts[Num] = N0.getOperand(0).getNode();
3491   return true;
3492 }
3493 
3494 /// Match a 32-bit packed halfword bswap. That is
3495 /// ((x & 0x000000ff) << 8) |
3496 /// ((x & 0x0000ff00) >> 8) |
3497 /// ((x & 0x00ff0000) << 8) |
3498 /// ((x & 0xff000000) >> 8)
3499 /// => (rotl (bswap x), 16)
3500 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3501   if (!LegalOperations)
3502     return SDValue();
3503 
3504   EVT VT = N->getValueType(0);
3505   if (VT != MVT::i32)
3506     return SDValue();
3507   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3508     return SDValue();
3509 
3510   // Look for either
3511   // (or (or (and), (and)), (or (and), (and)))
3512   // (or (or (or (and), (and)), (and)), (and))
3513   if (N0.getOpcode() != ISD::OR)
3514     return SDValue();
3515   SDValue N00 = N0.getOperand(0);
3516   SDValue N01 = N0.getOperand(1);
3517   SDNode *Parts[4] = {};
3518 
3519   if (N1.getOpcode() == ISD::OR &&
3520       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3521     // (or (or (and), (and)), (or (and), (and)))
3522     SDValue N000 = N00.getOperand(0);
3523     if (!isBSwapHWordElement(N000, Parts))
3524       return SDValue();
3525 
3526     SDValue N001 = N00.getOperand(1);
3527     if (!isBSwapHWordElement(N001, Parts))
3528       return SDValue();
3529     SDValue N010 = N01.getOperand(0);
3530     if (!isBSwapHWordElement(N010, Parts))
3531       return SDValue();
3532     SDValue N011 = N01.getOperand(1);
3533     if (!isBSwapHWordElement(N011, Parts))
3534       return SDValue();
3535   } else {
3536     // (or (or (or (and), (and)), (and)), (and))
3537     if (!isBSwapHWordElement(N1, Parts))
3538       return SDValue();
3539     if (!isBSwapHWordElement(N01, Parts))
3540       return SDValue();
3541     if (N00.getOpcode() != ISD::OR)
3542       return SDValue();
3543     SDValue N000 = N00.getOperand(0);
3544     if (!isBSwapHWordElement(N000, Parts))
3545       return SDValue();
3546     SDValue N001 = N00.getOperand(1);
3547     if (!isBSwapHWordElement(N001, Parts))
3548       return SDValue();
3549   }
3550 
3551   // Make sure the parts are all coming from the same node.
3552   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3553     return SDValue();
3554 
3555   SDLoc DL(N);
3556   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3557                               SDValue(Parts[0], 0));
3558 
3559   // Result of the bswap should be rotated by 16. If it's not legal, then
3560   // do  (x << 16) | (x >> 16).
3561   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3562   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3563     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3564   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3565     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3566   return DAG.getNode(ISD::OR, DL, VT,
3567                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3568                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3569 }
3570 
3571 /// This contains all DAGCombine rules which reduce two values combined by
3572 /// an Or operation to a single value \see visitANDLike().
3573 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3574   EVT VT = N1.getValueType();
3575   // fold (or x, undef) -> -1
3576   if (!LegalOperations &&
3577       (N0.isUndef() || N1.isUndef())) {
3578     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3579     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3580                            SDLoc(LocReference), VT);
3581   }
3582   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3583   SDValue LL, LR, RL, RR, CC0, CC1;
3584   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3585     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3586     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3587 
3588     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3589       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3590       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3591       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3592         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3593                                      LR.getValueType(), LL, RL);
3594         AddToWorklist(ORNode.getNode());
3595         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3596       }
3597       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3598       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
3599       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3600         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3601                                       LR.getValueType(), LL, RL);
3602         AddToWorklist(ANDNode.getNode());
3603         return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3604       }
3605     }
3606     // canonicalize equivalent to ll == rl
3607     if (LL == RR && LR == RL) {
3608       Op1 = ISD::getSetCCSwappedOperands(Op1);
3609       std::swap(RL, RR);
3610     }
3611     if (LL == RL && LR == RR) {
3612       bool isInteger = LL.getValueType().isInteger();
3613       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3614       if (Result != ISD::SETCC_INVALID &&
3615           (!LegalOperations ||
3616            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3617             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3618         EVT CCVT = getSetCCResultType(LL.getValueType());
3619         if (N0.getValueType() == CCVT ||
3620             (!LegalOperations && N0.getValueType() == MVT::i1))
3621           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3622                               LL, LR, Result);
3623       }
3624     }
3625   }
3626 
3627   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
3628   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3629       // Don't increase # computations.
3630       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3631     // We can only do this xform if we know that bits from X that are set in C2
3632     // but not in C1 are already zero.  Likewise for Y.
3633     if (const ConstantSDNode *N0O1C =
3634         getAsNonOpaqueConstant(N0.getOperand(1))) {
3635       if (const ConstantSDNode *N1O1C =
3636           getAsNonOpaqueConstant(N1.getOperand(1))) {
3637         // We can only do this xform if we know that bits from X that are set in
3638         // C2 but not in C1 are already zero.  Likewise for Y.
3639         const APInt &LHSMask = N0O1C->getAPIntValue();
3640         const APInt &RHSMask = N1O1C->getAPIntValue();
3641 
3642         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3643             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3644           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3645                                   N0.getOperand(0), N1.getOperand(0));
3646           SDLoc DL(LocReference);
3647           return DAG.getNode(ISD::AND, DL, VT, X,
3648                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
3649         }
3650       }
3651     }
3652   }
3653 
3654   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3655   if (N0.getOpcode() == ISD::AND &&
3656       N1.getOpcode() == ISD::AND &&
3657       N0.getOperand(0) == N1.getOperand(0) &&
3658       // Don't increase # computations.
3659       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3660     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3661                             N0.getOperand(1), N1.getOperand(1));
3662     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3663   }
3664 
3665   return SDValue();
3666 }
3667 
3668 SDValue DAGCombiner::visitOR(SDNode *N) {
3669   SDValue N0 = N->getOperand(0);
3670   SDValue N1 = N->getOperand(1);
3671   EVT VT = N1.getValueType();
3672 
3673   // fold vector ops
3674   if (VT.isVector()) {
3675     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3676       return FoldedVOp;
3677 
3678     // fold (or x, 0) -> x, vector edition
3679     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3680       return N1;
3681     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3682       return N0;
3683 
3684     // fold (or x, -1) -> -1, vector edition
3685     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3686       // do not return N0, because undef node may exist in N0
3687       return DAG.getConstant(
3688           APInt::getAllOnesValue(
3689               N0.getValueType().getScalarType().getSizeInBits()),
3690           SDLoc(N), N0.getValueType());
3691     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3692       // do not return N1, because undef node may exist in N1
3693       return DAG.getConstant(
3694           APInt::getAllOnesValue(
3695               N1.getValueType().getScalarType().getSizeInBits()),
3696           SDLoc(N), N1.getValueType());
3697 
3698     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
3699     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
3700     // Do this only if the resulting shuffle is legal.
3701     if (isa<ShuffleVectorSDNode>(N0) &&
3702         isa<ShuffleVectorSDNode>(N1) &&
3703         // Avoid folding a node with illegal type.
3704         TLI.isTypeLegal(VT) &&
3705         N0->getOperand(1) == N1->getOperand(1) &&
3706         ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
3707       bool CanFold = true;
3708       unsigned NumElts = VT.getVectorNumElements();
3709       const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3710       const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3711       // We construct two shuffle masks:
3712       // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
3713       // and N1 as the second operand.
3714       // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
3715       // and N0 as the second operand.
3716       // We do this because OR is commutable and therefore there might be
3717       // two ways to fold this node into a shuffle.
3718       SmallVector<int,4> Mask1;
3719       SmallVector<int,4> Mask2;
3720 
3721       for (unsigned i = 0; i != NumElts && CanFold; ++i) {
3722         int M0 = SV0->getMaskElt(i);
3723         int M1 = SV1->getMaskElt(i);
3724 
3725         // Both shuffle indexes are undef. Propagate Undef.
3726         if (M0 < 0 && M1 < 0) {
3727           Mask1.push_back(M0);
3728           Mask2.push_back(M0);
3729           continue;
3730         }
3731 
3732         if (M0 < 0 || M1 < 0 ||
3733             (M0 < (int)NumElts && M1 < (int)NumElts) ||
3734             (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
3735           CanFold = false;
3736           break;
3737         }
3738 
3739         Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
3740         Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
3741       }
3742 
3743       if (CanFold) {
3744         // Fold this sequence only if the resulting shuffle is 'legal'.
3745         if (TLI.isShuffleMaskLegal(Mask1, VT))
3746           return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
3747                                       N1->getOperand(0), &Mask1[0]);
3748         if (TLI.isShuffleMaskLegal(Mask2, VT))
3749           return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
3750                                       N0->getOperand(0), &Mask2[0]);
3751       }
3752     }
3753   }
3754 
3755   // fold (or c1, c2) -> c1|c2
3756   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3757   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3758   if (N0C && N1C && !N1C->isOpaque())
3759     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3760   // canonicalize constant to RHS
3761   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3762      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3763     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3764   // fold (or x, 0) -> x
3765   if (isNullConstant(N1))
3766     return N0;
3767   // fold (or x, -1) -> -1
3768   if (isAllOnesConstant(N1))
3769     return N1;
3770   // fold (or x, c) -> c iff (x & ~c) == 0
3771   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3772     return N1;
3773 
3774   if (SDValue Combined = visitORLike(N0, N1, N))
3775     return Combined;
3776 
3777   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3778   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
3779     return BSwap;
3780   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
3781     return BSwap;
3782 
3783   // reassociate or
3784   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3785     return ROR;
3786   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3787   // iff (c1 & c2) == 0.
3788   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3789              isa<ConstantSDNode>(N0.getOperand(1))) {
3790     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3791     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3792       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3793                                                    N1C, C1))
3794         return DAG.getNode(
3795             ISD::AND, SDLoc(N), VT,
3796             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3797       return SDValue();
3798     }
3799   }
3800   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
3801   if (N0.getOpcode() == N1.getOpcode())
3802     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3803       return Tmp;
3804 
3805   // See if this is some rotate idiom.
3806   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3807     return SDValue(Rot, 0);
3808 
3809   // Simplify the operands using demanded-bits information.
3810   if (!VT.isVector() &&
3811       SimplifyDemandedBits(SDValue(N, 0)))
3812     return SDValue(N, 0);
3813 
3814   return SDValue();
3815 }
3816 
3817 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3818 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3819   if (Op.getOpcode() == ISD::AND) {
3820     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
3821       Mask = Op.getOperand(1);
3822       Op = Op.getOperand(0);
3823     } else {
3824       return false;
3825     }
3826   }
3827 
3828   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3829     Shift = Op;
3830     return true;
3831   }
3832 
3833   return false;
3834 }
3835 
3836 // Return true if we can prove that, whenever Neg and Pos are both in the
3837 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
3838 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3839 //
3840 //     (or (shift1 X, Neg), (shift2 X, Pos))
3841 //
3842 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3843 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
3844 // to consider shift amounts with defined behavior.
3845 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
3846   // If EltSize is a power of 2 then:
3847   //
3848   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
3849   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
3850   //
3851   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
3852   // for the stronger condition:
3853   //
3854   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
3855   //
3856   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
3857   // we can just replace Neg with Neg' for the rest of the function.
3858   //
3859   // In other cases we check for the even stronger condition:
3860   //
3861   //     Neg == EltSize - Pos                                    [B]
3862   //
3863   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
3864   // behavior if Pos == 0 (and consequently Neg == EltSize).
3865   //
3866   // We could actually use [A] whenever EltSize is a power of 2, but the
3867   // only extra cases that it would match are those uninteresting ones
3868   // where Neg and Pos are never in range at the same time.  E.g. for
3869   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
3870   // as well as (sub 32, Pos), but:
3871   //
3872   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
3873   //
3874   // always invokes undefined behavior for 32-bit X.
3875   //
3876   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
3877   unsigned MaskLoBits = 0;
3878   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
3879     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
3880       if (NegC->getAPIntValue() == EltSize - 1) {
3881         Neg = Neg.getOperand(0);
3882         MaskLoBits = Log2_64(EltSize);
3883       }
3884     }
3885   }
3886 
3887   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
3888   if (Neg.getOpcode() != ISD::SUB)
3889     return false;
3890   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
3891   if (!NegC)
3892     return false;
3893   SDValue NegOp1 = Neg.getOperand(1);
3894 
3895   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
3896   // Pos'.  The truncation is redundant for the purpose of the equality.
3897   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
3898     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
3899       if (PosC->getAPIntValue() == EltSize - 1)
3900         Pos = Pos.getOperand(0);
3901 
3902   // The condition we need is now:
3903   //
3904   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
3905   //
3906   // If NegOp1 == Pos then we need:
3907   //
3908   //              EltSize & Mask == NegC & Mask
3909   //
3910   // (because "x & Mask" is a truncation and distributes through subtraction).
3911   APInt Width;
3912   if (Pos == NegOp1)
3913     Width = NegC->getAPIntValue();
3914 
3915   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
3916   // Then the condition we want to prove becomes:
3917   //
3918   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
3919   //
3920   // which, again because "x & Mask" is a truncation, becomes:
3921   //
3922   //                NegC & Mask == (EltSize - PosC) & Mask
3923   //             EltSize & Mask == (NegC + PosC) & Mask
3924   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
3925     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
3926       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
3927     else
3928       return false;
3929   } else
3930     return false;
3931 
3932   // Now we just need to check that EltSize & Mask == Width & Mask.
3933   if (MaskLoBits)
3934     // EltSize & Mask is 0 since Mask is EltSize - 1.
3935     return Width.getLoBits(MaskLoBits) == 0;
3936   return Width == EltSize;
3937 }
3938 
3939 // A subroutine of MatchRotate used once we have found an OR of two opposite
3940 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
3941 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
3942 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
3943 // Neg with outer conversions stripped away.
3944 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
3945                                        SDValue Neg, SDValue InnerPos,
3946                                        SDValue InnerNeg, unsigned PosOpcode,
3947                                        unsigned NegOpcode, SDLoc DL) {
3948   // fold (or (shl x, (*ext y)),
3949   //          (srl x, (*ext (sub 32, y)))) ->
3950   //   (rotl x, y) or (rotr x, (sub 32, y))
3951   //
3952   // fold (or (shl x, (*ext (sub 32, y))),
3953   //          (srl x, (*ext y))) ->
3954   //   (rotr x, y) or (rotl x, (sub 32, y))
3955   EVT VT = Shifted.getValueType();
3956   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
3957     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
3958     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
3959                        HasPos ? Pos : Neg).getNode();
3960   }
3961 
3962   return nullptr;
3963 }
3964 
3965 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
3966 // idioms for rotate, and if the target supports rotation instructions, generate
3967 // a rot[lr].
3968 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
3969   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
3970   EVT VT = LHS.getValueType();
3971   if (!TLI.isTypeLegal(VT)) return nullptr;
3972 
3973   // The target must have at least one rotate flavor.
3974   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
3975   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
3976   if (!HasROTL && !HasROTR) return nullptr;
3977 
3978   // Match "(X shl/srl V1) & V2" where V2 may not be present.
3979   SDValue LHSShift;   // The shift.
3980   SDValue LHSMask;    // AND value if any.
3981   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
3982     return nullptr; // Not part of a rotate.
3983 
3984   SDValue RHSShift;   // The shift.
3985   SDValue RHSMask;    // AND value if any.
3986   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
3987     return nullptr; // Not part of a rotate.
3988 
3989   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
3990     return nullptr;   // Not shifting the same value.
3991 
3992   if (LHSShift.getOpcode() == RHSShift.getOpcode())
3993     return nullptr;   // Shifts must disagree.
3994 
3995   // Canonicalize shl to left side in a shl/srl pair.
3996   if (RHSShift.getOpcode() == ISD::SHL) {
3997     std::swap(LHS, RHS);
3998     std::swap(LHSShift, RHSShift);
3999     std::swap(LHSMask, RHSMask);
4000   }
4001 
4002   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4003   SDValue LHSShiftArg = LHSShift.getOperand(0);
4004   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4005   SDValue RHSShiftArg = RHSShift.getOperand(0);
4006   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4007 
4008   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4009   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4010   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4011     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4012     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4013     if ((LShVal + RShVal) != EltSizeInBits)
4014       return nullptr;
4015 
4016     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4017                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4018 
4019     // If there is an AND of either shifted operand, apply it to the result.
4020     if (LHSMask.getNode() || RHSMask.getNode()) {
4021       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4022       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
4023 
4024       if (LHSMask.getNode()) {
4025         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4026         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4027                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4028                                        DAG.getConstant(RHSBits, DL, VT)));
4029       }
4030       if (RHSMask.getNode()) {
4031         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4032         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4033                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4034                                        DAG.getConstant(LHSBits, DL, VT)));
4035       }
4036 
4037       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4038     }
4039 
4040     return Rot.getNode();
4041   }
4042 
4043   // If there is a mask here, and we have a variable shift, we can't be sure
4044   // that we're masking out the right stuff.
4045   if (LHSMask.getNode() || RHSMask.getNode())
4046     return nullptr;
4047 
4048   // If the shift amount is sign/zext/any-extended just peel it off.
4049   SDValue LExtOp0 = LHSShiftAmt;
4050   SDValue RExtOp0 = RHSShiftAmt;
4051   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4052        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4053        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4054        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4055       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4056        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4057        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4058        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4059     LExtOp0 = LHSShiftAmt.getOperand(0);
4060     RExtOp0 = RHSShiftAmt.getOperand(0);
4061   }
4062 
4063   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4064                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4065   if (TryL)
4066     return TryL;
4067 
4068   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4069                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4070   if (TryR)
4071     return TryR;
4072 
4073   return nullptr;
4074 }
4075 
4076 SDValue DAGCombiner::visitXOR(SDNode *N) {
4077   SDValue N0 = N->getOperand(0);
4078   SDValue N1 = N->getOperand(1);
4079   EVT VT = N0.getValueType();
4080 
4081   // fold vector ops
4082   if (VT.isVector()) {
4083     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4084       return FoldedVOp;
4085 
4086     // fold (xor x, 0) -> x, vector edition
4087     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4088       return N1;
4089     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4090       return N0;
4091   }
4092 
4093   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4094   if (N0.isUndef() && N1.isUndef())
4095     return DAG.getConstant(0, SDLoc(N), VT);
4096   // fold (xor x, undef) -> undef
4097   if (N0.isUndef())
4098     return N0;
4099   if (N1.isUndef())
4100     return N1;
4101   // fold (xor c1, c2) -> c1^c2
4102   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4103   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4104   if (N0C && N1C)
4105     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4106   // canonicalize constant to RHS
4107   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4108      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4109     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4110   // fold (xor x, 0) -> x
4111   if (isNullConstant(N1))
4112     return N0;
4113   // reassociate xor
4114   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4115     return RXOR;
4116 
4117   // fold !(x cc y) -> (x !cc y)
4118   SDValue LHS, RHS, CC;
4119   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4120     bool isInt = LHS.getValueType().isInteger();
4121     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4122                                                isInt);
4123 
4124     if (!LegalOperations ||
4125         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4126       switch (N0.getOpcode()) {
4127       default:
4128         llvm_unreachable("Unhandled SetCC Equivalent!");
4129       case ISD::SETCC:
4130         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4131       case ISD::SELECT_CC:
4132         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4133                                N0.getOperand(3), NotCC);
4134       }
4135     }
4136   }
4137 
4138   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4139   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4140       N0.getNode()->hasOneUse() &&
4141       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4142     SDValue V = N0.getOperand(0);
4143     SDLoc DL(N0);
4144     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4145                     DAG.getConstant(1, DL, V.getValueType()));
4146     AddToWorklist(V.getNode());
4147     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4148   }
4149 
4150   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4151   if (isOneConstant(N1) && VT == MVT::i1 &&
4152       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4153     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4154     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4155       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4156       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4157       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4158       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4159       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4160     }
4161   }
4162   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4163   if (isAllOnesConstant(N1) &&
4164       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4165     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4166     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4167       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4168       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4169       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4170       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4171       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4172     }
4173   }
4174   // fold (xor (and x, y), y) -> (and (not x), y)
4175   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4176       N0->getOperand(1) == N1) {
4177     SDValue X = N0->getOperand(0);
4178     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4179     AddToWorklist(NotX.getNode());
4180     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4181   }
4182   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4183   if (N1C && N0.getOpcode() == ISD::XOR) {
4184     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4185       SDLoc DL(N);
4186       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4187                          DAG.getConstant(N1C->getAPIntValue() ^
4188                                          N00C->getAPIntValue(), DL, VT));
4189     }
4190     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4191       SDLoc DL(N);
4192       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4193                          DAG.getConstant(N1C->getAPIntValue() ^
4194                                          N01C->getAPIntValue(), DL, VT));
4195     }
4196   }
4197   // fold (xor x, x) -> 0
4198   if (N0 == N1)
4199     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4200 
4201   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4202   // Here is a concrete example of this equivalence:
4203   // i16   x ==  14
4204   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
4205   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4206   //
4207   // =>
4208   //
4209   // i16     ~1      == 0b1111111111111110
4210   // i16 rol(~1, 14) == 0b1011111111111111
4211   //
4212   // Some additional tips to help conceptualize this transform:
4213   // - Try to see the operation as placing a single zero in a value of all ones.
4214   // - There exists no value for x which would allow the result to contain zero.
4215   // - Values of x larger than the bitwidth are undefined and do not require a
4216   //   consistent result.
4217   // - Pushing the zero left requires shifting one bits in from the right.
4218   // A rotate left of ~1 is a nice way of achieving the desired result.
4219   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4220       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4221     SDLoc DL(N);
4222     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4223                        N0.getOperand(1));
4224   }
4225 
4226   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
4227   if (N0.getOpcode() == N1.getOpcode())
4228     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4229       return Tmp;
4230 
4231   // Simplify the expression using non-local knowledge.
4232   if (!VT.isVector() &&
4233       SimplifyDemandedBits(SDValue(N, 0)))
4234     return SDValue(N, 0);
4235 
4236   return SDValue();
4237 }
4238 
4239 /// Handle transforms common to the three shifts, when the shift amount is a
4240 /// constant.
4241 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4242   SDNode *LHS = N->getOperand(0).getNode();
4243   if (!LHS->hasOneUse()) return SDValue();
4244 
4245   // We want to pull some binops through shifts, so that we have (and (shift))
4246   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
4247   // thing happens with address calculations, so it's important to canonicalize
4248   // it.
4249   bool HighBitSet = false;  // Can we transform this if the high bit is set?
4250 
4251   switch (LHS->getOpcode()) {
4252   default: return SDValue();
4253   case ISD::OR:
4254   case ISD::XOR:
4255     HighBitSet = false; // We can only transform sra if the high bit is clear.
4256     break;
4257   case ISD::AND:
4258     HighBitSet = true;  // We can only transform sra if the high bit is set.
4259     break;
4260   case ISD::ADD:
4261     if (N->getOpcode() != ISD::SHL)
4262       return SDValue(); // only shl(add) not sr[al](add).
4263     HighBitSet = false; // We can only transform sra if the high bit is clear.
4264     break;
4265   }
4266 
4267   // We require the RHS of the binop to be a constant and not opaque as well.
4268   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4269   if (!BinOpCst) return SDValue();
4270 
4271   // FIXME: disable this unless the input to the binop is a shift by a constant.
4272   // If it is not a shift, it pessimizes some common cases like:
4273   //
4274   //    void foo(int *X, int i) { X[i & 1235] = 1; }
4275   //    int bar(int *X, int i) { return X[i & 255]; }
4276   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4277   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
4278        BinOpLHSVal->getOpcode() != ISD::SRA &&
4279        BinOpLHSVal->getOpcode() != ISD::SRL) ||
4280       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
4281     return SDValue();
4282 
4283   EVT VT = N->getValueType(0);
4284 
4285   // If this is a signed shift right, and the high bit is modified by the
4286   // logical operation, do not perform the transformation. The highBitSet
4287   // boolean indicates the value of the high bit of the constant which would
4288   // cause it to be modified for this operation.
4289   if (N->getOpcode() == ISD::SRA) {
4290     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4291     if (BinOpRHSSignSet != HighBitSet)
4292       return SDValue();
4293   }
4294 
4295   if (!TLI.isDesirableToCommuteWithShift(LHS))
4296     return SDValue();
4297 
4298   // Fold the constants, shifting the binop RHS by the shift amount.
4299   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4300                                N->getValueType(0),
4301                                LHS->getOperand(1), N->getOperand(1));
4302   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4303 
4304   // Create the new shift.
4305   SDValue NewShift = DAG.getNode(N->getOpcode(),
4306                                  SDLoc(LHS->getOperand(0)),
4307                                  VT, LHS->getOperand(0), N->getOperand(1));
4308 
4309   // Create the new binop.
4310   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4311 }
4312 
4313 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4314   assert(N->getOpcode() == ISD::TRUNCATE);
4315   assert(N->getOperand(0).getOpcode() == ISD::AND);
4316 
4317   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4318   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4319     SDValue N01 = N->getOperand(0).getOperand(1);
4320 
4321     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
4322       if (!N01C->isOpaque()) {
4323         EVT TruncVT = N->getValueType(0);
4324         SDValue N00 = N->getOperand(0).getOperand(0);
4325         APInt TruncC = N01C->getAPIntValue();
4326         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
4327         SDLoc DL(N);
4328 
4329         return DAG.getNode(ISD::AND, DL, TruncVT,
4330                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
4331                            DAG.getConstant(TruncC, DL, TruncVT));
4332       }
4333     }
4334   }
4335 
4336   return SDValue();
4337 }
4338 
4339 SDValue DAGCombiner::visitRotate(SDNode *N) {
4340   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4341   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4342       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4343     if (SDValue NewOp1 =
4344             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
4345       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4346                          N->getOperand(0), NewOp1);
4347   }
4348   return SDValue();
4349 }
4350 
4351 SDValue DAGCombiner::visitSHL(SDNode *N) {
4352   SDValue N0 = N->getOperand(0);
4353   SDValue N1 = N->getOperand(1);
4354   EVT VT = N0.getValueType();
4355   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4356 
4357   // fold vector ops
4358   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4359   if (VT.isVector()) {
4360     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4361       return FoldedVOp;
4362 
4363     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
4364     // If setcc produces all-one true value then:
4365     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4366     if (N1CV && N1CV->isConstant()) {
4367       if (N0.getOpcode() == ISD::AND) {
4368         SDValue N00 = N0->getOperand(0);
4369         SDValue N01 = N0->getOperand(1);
4370         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
4371 
4372         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4373             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4374                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
4375           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4376                                                      N01CV, N1CV))
4377             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4378         }
4379       } else {
4380         N1C = isConstOrConstSplat(N1);
4381       }
4382     }
4383   }
4384 
4385   // fold (shl c1, c2) -> c1<<c2
4386   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4387   if (N0C && N1C && !N1C->isOpaque())
4388     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4389   // fold (shl 0, x) -> 0
4390   if (isNullConstant(N0))
4391     return N0;
4392   // fold (shl x, c >= size(x)) -> undef
4393   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4394     return DAG.getUNDEF(VT);
4395   // fold (shl x, 0) -> x
4396   if (N1C && N1C->isNullValue())
4397     return N0;
4398   // fold (shl undef, x) -> 0
4399   if (N0.isUndef())
4400     return DAG.getConstant(0, SDLoc(N), VT);
4401   // if (shl x, c) is known to be zero, return 0
4402   if (DAG.MaskedValueIsZero(SDValue(N, 0),
4403                             APInt::getAllOnesValue(OpSizeInBits)))
4404     return DAG.getConstant(0, SDLoc(N), VT);
4405   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4406   if (N1.getOpcode() == ISD::TRUNCATE &&
4407       N1.getOperand(0).getOpcode() == ISD::AND) {
4408     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4409       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4410   }
4411 
4412   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4413     return SDValue(N, 0);
4414 
4415   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4416   if (N1C && N0.getOpcode() == ISD::SHL) {
4417     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4418       uint64_t c1 = N0C1->getZExtValue();
4419       uint64_t c2 = N1C->getZExtValue();
4420       SDLoc DL(N);
4421       if (c1 + c2 >= OpSizeInBits)
4422         return DAG.getConstant(0, DL, VT);
4423       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4424                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4425     }
4426   }
4427 
4428   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4429   // For this to be valid, the second form must not preserve any of the bits
4430   // that are shifted out by the inner shift in the first form.  This means
4431   // the outer shift size must be >= the number of bits added by the ext.
4432   // As a corollary, we don't care what kind of ext it is.
4433   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4434               N0.getOpcode() == ISD::ANY_EXTEND ||
4435               N0.getOpcode() == ISD::SIGN_EXTEND) &&
4436       N0.getOperand(0).getOpcode() == ISD::SHL) {
4437     SDValue N0Op0 = N0.getOperand(0);
4438     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4439       uint64_t c1 = N0Op0C1->getZExtValue();
4440       uint64_t c2 = N1C->getZExtValue();
4441       EVT InnerShiftVT = N0Op0.getValueType();
4442       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4443       if (c2 >= OpSizeInBits - InnerShiftSize) {
4444         SDLoc DL(N0);
4445         if (c1 + c2 >= OpSizeInBits)
4446           return DAG.getConstant(0, DL, VT);
4447         return DAG.getNode(ISD::SHL, DL, VT,
4448                            DAG.getNode(N0.getOpcode(), DL, VT,
4449                                        N0Op0->getOperand(0)),
4450                            DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4451       }
4452     }
4453   }
4454 
4455   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4456   // Only fold this if the inner zext has no other uses to avoid increasing
4457   // the total number of instructions.
4458   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4459       N0.getOperand(0).getOpcode() == ISD::SRL) {
4460     SDValue N0Op0 = N0.getOperand(0);
4461     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4462       uint64_t c1 = N0Op0C1->getZExtValue();
4463       if (c1 < VT.getScalarSizeInBits()) {
4464         uint64_t c2 = N1C->getZExtValue();
4465         if (c1 == c2) {
4466           SDValue NewOp0 = N0.getOperand(0);
4467           EVT CountVT = NewOp0.getOperand(1).getValueType();
4468           SDLoc DL(N);
4469           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4470                                        NewOp0,
4471                                        DAG.getConstant(c2, DL, CountVT));
4472           AddToWorklist(NewSHL.getNode());
4473           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4474         }
4475       }
4476     }
4477   }
4478 
4479   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
4480   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
4481   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4482       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4483     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4484       uint64_t C1 = N0C1->getZExtValue();
4485       uint64_t C2 = N1C->getZExtValue();
4486       SDLoc DL(N);
4487       if (C1 <= C2)
4488         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4489                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4490       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4491                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4492     }
4493   }
4494 
4495   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4496   //                               (and (srl x, (sub c1, c2), MASK)
4497   // Only fold this if the inner shift has no other uses -- if it does, folding
4498   // this will increase the total number of instructions.
4499   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4500     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4501       uint64_t c1 = N0C1->getZExtValue();
4502       if (c1 < OpSizeInBits) {
4503         uint64_t c2 = N1C->getZExtValue();
4504         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4505         SDValue Shift;
4506         if (c2 > c1) {
4507           Mask = Mask.shl(c2 - c1);
4508           SDLoc DL(N);
4509           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4510                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4511         } else {
4512           Mask = Mask.lshr(c1 - c2);
4513           SDLoc DL(N);
4514           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4515                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4516         }
4517         SDLoc DL(N0);
4518         return DAG.getNode(ISD::AND, DL, VT, Shift,
4519                            DAG.getConstant(Mask, DL, VT));
4520       }
4521     }
4522   }
4523   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4524   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
4525     unsigned BitSize = VT.getScalarSizeInBits();
4526     SDLoc DL(N);
4527     SDValue HiBitsMask =
4528       DAG.getConstant(APInt::getHighBitsSet(BitSize,
4529                                             BitSize - N1C->getZExtValue()),
4530                       DL, VT);
4531     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4532                        HiBitsMask);
4533   }
4534 
4535   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4536   // Variant of version done on multiply, except mul by a power of 2 is turned
4537   // into a shift.
4538   APInt Val;
4539   if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4540       (isa<ConstantSDNode>(N0.getOperand(1)) ||
4541        isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
4542     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4543     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4544     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4545   }
4546 
4547   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
4548   if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
4549     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4550       if (SDValue Folded =
4551               DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
4552         return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
4553     }
4554   }
4555 
4556   if (N1C && !N1C->isOpaque())
4557     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
4558       return NewSHL;
4559 
4560   return SDValue();
4561 }
4562 
4563 SDValue DAGCombiner::visitSRA(SDNode *N) {
4564   SDValue N0 = N->getOperand(0);
4565   SDValue N1 = N->getOperand(1);
4566   EVT VT = N0.getValueType();
4567   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4568 
4569   // fold vector ops
4570   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4571   if (VT.isVector()) {
4572     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4573       return FoldedVOp;
4574 
4575     N1C = isConstOrConstSplat(N1);
4576   }
4577 
4578   // fold (sra c1, c2) -> (sra c1, c2)
4579   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4580   if (N0C && N1C && !N1C->isOpaque())
4581     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4582   // fold (sra 0, x) -> 0
4583   if (isNullConstant(N0))
4584     return N0;
4585   // fold (sra -1, x) -> -1
4586   if (isAllOnesConstant(N0))
4587     return N0;
4588   // fold (sra x, (setge c, size(x))) -> undef
4589   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
4590     return DAG.getUNDEF(VT);
4591   // fold (sra x, 0) -> x
4592   if (N1C && N1C->isNullValue())
4593     return N0;
4594   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4595   // sext_inreg.
4596   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4597     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4598     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4599     if (VT.isVector())
4600       ExtVT = EVT::getVectorVT(*DAG.getContext(),
4601                                ExtVT, VT.getVectorNumElements());
4602     if ((!LegalOperations ||
4603          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4604       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4605                          N0.getOperand(0), DAG.getValueType(ExtVT));
4606   }
4607 
4608   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4609   if (N1C && N0.getOpcode() == ISD::SRA) {
4610     if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
4611       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
4612       if (Sum >= OpSizeInBits)
4613         Sum = OpSizeInBits - 1;
4614       SDLoc DL(N);
4615       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
4616                          DAG.getConstant(Sum, DL, N1.getValueType()));
4617     }
4618   }
4619 
4620   // fold (sra (shl X, m), (sub result_size, n))
4621   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4622   // result_size - n != m.
4623   // If truncate is free for the target sext(shl) is likely to result in better
4624   // code.
4625   if (N0.getOpcode() == ISD::SHL && N1C) {
4626     // Get the two constanst of the shifts, CN0 = m, CN = n.
4627     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4628     if (N01C) {
4629       LLVMContext &Ctx = *DAG.getContext();
4630       // Determine what the truncate's result bitsize and type would be.
4631       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4632 
4633       if (VT.isVector())
4634         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4635 
4636       // Determine the residual right-shift amount.
4637       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4638 
4639       // If the shift is not a no-op (in which case this should be just a sign
4640       // extend already), the truncated to type is legal, sign_extend is legal
4641       // on that type, and the truncate to that type is both legal and free,
4642       // perform the transform.
4643       if ((ShiftAmt > 0) &&
4644           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4645           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4646           TLI.isTruncateFree(VT, TruncVT)) {
4647 
4648         SDLoc DL(N);
4649         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4650             getShiftAmountTy(N0.getOperand(0).getValueType()));
4651         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4652                                     N0.getOperand(0), Amt);
4653         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4654                                     Shift);
4655         return DAG.getNode(ISD::SIGN_EXTEND, DL,
4656                            N->getValueType(0), Trunc);
4657       }
4658     }
4659   }
4660 
4661   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4662   if (N1.getOpcode() == ISD::TRUNCATE &&
4663       N1.getOperand(0).getOpcode() == ISD::AND) {
4664     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4665       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4666   }
4667 
4668   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4669   //      if c1 is equal to the number of bits the trunc removes
4670   if (N0.getOpcode() == ISD::TRUNCATE &&
4671       (N0.getOperand(0).getOpcode() == ISD::SRL ||
4672        N0.getOperand(0).getOpcode() == ISD::SRA) &&
4673       N0.getOperand(0).hasOneUse() &&
4674       N0.getOperand(0).getOperand(1).hasOneUse() &&
4675       N1C) {
4676     SDValue N0Op0 = N0.getOperand(0);
4677     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4678       unsigned LargeShiftVal = LargeShift->getZExtValue();
4679       EVT LargeVT = N0Op0.getValueType();
4680 
4681       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4682         SDLoc DL(N);
4683         SDValue Amt =
4684           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4685                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
4686         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4687                                   N0Op0.getOperand(0), Amt);
4688         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4689       }
4690     }
4691   }
4692 
4693   // Simplify, based on bits shifted out of the LHS.
4694   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4695     return SDValue(N, 0);
4696 
4697 
4698   // If the sign bit is known to be zero, switch this to a SRL.
4699   if (DAG.SignBitIsZero(N0))
4700     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4701 
4702   if (N1C && !N1C->isOpaque())
4703     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
4704       return NewSRA;
4705 
4706   return SDValue();
4707 }
4708 
4709 SDValue DAGCombiner::visitSRL(SDNode *N) {
4710   SDValue N0 = N->getOperand(0);
4711   SDValue N1 = N->getOperand(1);
4712   EVT VT = N0.getValueType();
4713   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4714 
4715   // fold vector ops
4716   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4717   if (VT.isVector()) {
4718     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4719       return FoldedVOp;
4720 
4721     N1C = isConstOrConstSplat(N1);
4722   }
4723 
4724   // fold (srl c1, c2) -> c1 >>u c2
4725   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4726   if (N0C && N1C && !N1C->isOpaque())
4727     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
4728   // fold (srl 0, x) -> 0
4729   if (isNullConstant(N0))
4730     return N0;
4731   // fold (srl x, c >= size(x)) -> undef
4732   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
4733     return DAG.getUNDEF(VT);
4734   // fold (srl x, 0) -> x
4735   if (N1C && N1C->isNullValue())
4736     return N0;
4737   // if (srl x, c) is known to be zero, return 0
4738   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4739                                    APInt::getAllOnesValue(OpSizeInBits)))
4740     return DAG.getConstant(0, SDLoc(N), VT);
4741 
4742   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4743   if (N1C && N0.getOpcode() == ISD::SRL) {
4744     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
4745       uint64_t c1 = N01C->getZExtValue();
4746       uint64_t c2 = N1C->getZExtValue();
4747       SDLoc DL(N);
4748       if (c1 + c2 >= OpSizeInBits)
4749         return DAG.getConstant(0, DL, VT);
4750       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4751                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4752     }
4753   }
4754 
4755   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4756   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4757       N0.getOperand(0).getOpcode() == ISD::SRL &&
4758       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4759     uint64_t c1 =
4760       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4761     uint64_t c2 = N1C->getZExtValue();
4762     EVT InnerShiftVT = N0.getOperand(0).getValueType();
4763     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4764     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
4765     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4766     if (c1 + OpSizeInBits == InnerShiftSize) {
4767       SDLoc DL(N0);
4768       if (c1 + c2 >= InnerShiftSize)
4769         return DAG.getConstant(0, DL, VT);
4770       return DAG.getNode(ISD::TRUNCATE, DL, VT,
4771                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
4772                                      N0.getOperand(0)->getOperand(0),
4773                                      DAG.getConstant(c1 + c2, DL,
4774                                                      ShiftCountVT)));
4775     }
4776   }
4777 
4778   // fold (srl (shl x, c), c) -> (and x, cst2)
4779   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
4780     unsigned BitSize = N0.getScalarValueSizeInBits();
4781     if (BitSize <= 64) {
4782       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
4783       SDLoc DL(N);
4784       return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4785                          DAG.getConstant(~0ULL >> ShAmt, DL, VT));
4786     }
4787   }
4788 
4789   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4790   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4791     // Shifting in all undef bits?
4792     EVT SmallVT = N0.getOperand(0).getValueType();
4793     unsigned BitSize = SmallVT.getScalarSizeInBits();
4794     if (N1C->getZExtValue() >= BitSize)
4795       return DAG.getUNDEF(VT);
4796 
4797     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4798       uint64_t ShiftAmt = N1C->getZExtValue();
4799       SDLoc DL0(N0);
4800       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
4801                                        N0.getOperand(0),
4802                           DAG.getConstant(ShiftAmt, DL0,
4803                                           getShiftAmountTy(SmallVT)));
4804       AddToWorklist(SmallShift.getNode());
4805       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
4806       SDLoc DL(N);
4807       return DAG.getNode(ISD::AND, DL, VT,
4808                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
4809                          DAG.getConstant(Mask, DL, VT));
4810     }
4811   }
4812 
4813   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
4814   // bit, which is unmodified by sra.
4815   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
4816     if (N0.getOpcode() == ISD::SRA)
4817       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4818   }
4819 
4820   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
4821   if (N1C && N0.getOpcode() == ISD::CTLZ &&
4822       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
4823     APInt KnownZero, KnownOne;
4824     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
4825 
4826     // If any of the input bits are KnownOne, then the input couldn't be all
4827     // zeros, thus the result of the srl will always be zero.
4828     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
4829 
4830     // If all of the bits input the to ctlz node are known to be zero, then
4831     // the result of the ctlz is "32" and the result of the shift is one.
4832     APInt UnknownBits = ~KnownZero;
4833     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
4834 
4835     // Otherwise, check to see if there is exactly one bit input to the ctlz.
4836     if ((UnknownBits & (UnknownBits - 1)) == 0) {
4837       // Okay, we know that only that the single bit specified by UnknownBits
4838       // could be set on input to the CTLZ node. If this bit is set, the SRL
4839       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
4840       // to an SRL/XOR pair, which is likely to simplify more.
4841       unsigned ShAmt = UnknownBits.countTrailingZeros();
4842       SDValue Op = N0.getOperand(0);
4843 
4844       if (ShAmt) {
4845         SDLoc DL(N0);
4846         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4847                   DAG.getConstant(ShAmt, DL,
4848                                   getShiftAmountTy(Op.getValueType())));
4849         AddToWorklist(Op.getNode());
4850       }
4851 
4852       SDLoc DL(N);
4853       return DAG.getNode(ISD::XOR, DL, VT,
4854                          Op, DAG.getConstant(1, DL, VT));
4855     }
4856   }
4857 
4858   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
4859   if (N1.getOpcode() == ISD::TRUNCATE &&
4860       N1.getOperand(0).getOpcode() == ISD::AND) {
4861     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4862       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
4863   }
4864 
4865   // fold operands of srl based on knowledge that the low bits are not
4866   // demanded.
4867   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4868     return SDValue(N, 0);
4869 
4870   if (N1C && !N1C->isOpaque())
4871     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
4872       return NewSRL;
4873 
4874   // Attempt to convert a srl of a load into a narrower zero-extending load.
4875   if (SDValue NarrowLoad = ReduceLoadWidth(N))
4876     return NarrowLoad;
4877 
4878   // Here is a common situation. We want to optimize:
4879   //
4880   //   %a = ...
4881   //   %b = and i32 %a, 2
4882   //   %c = srl i32 %b, 1
4883   //   brcond i32 %c ...
4884   //
4885   // into
4886   //
4887   //   %a = ...
4888   //   %b = and %a, 2
4889   //   %c = setcc eq %b, 0
4890   //   brcond %c ...
4891   //
4892   // However when after the source operand of SRL is optimized into AND, the SRL
4893   // itself may not be optimized further. Look for it and add the BRCOND into
4894   // the worklist.
4895   if (N->hasOneUse()) {
4896     SDNode *Use = *N->use_begin();
4897     if (Use->getOpcode() == ISD::BRCOND)
4898       AddToWorklist(Use);
4899     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
4900       // Also look pass the truncate.
4901       Use = *Use->use_begin();
4902       if (Use->getOpcode() == ISD::BRCOND)
4903         AddToWorklist(Use);
4904     }
4905   }
4906 
4907   return SDValue();
4908 }
4909 
4910 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
4911   SDValue N0 = N->getOperand(0);
4912   EVT VT = N->getValueType(0);
4913 
4914   // fold (bswap c1) -> c2
4915   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4916     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
4917   // fold (bswap (bswap x)) -> x
4918   if (N0.getOpcode() == ISD::BSWAP)
4919     return N0->getOperand(0);
4920   return SDValue();
4921 }
4922 
4923 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
4924   SDValue N0 = N->getOperand(0);
4925   EVT VT = N->getValueType(0);
4926 
4927   // fold (ctlz c1) -> c2
4928   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4929     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
4930   return SDValue();
4931 }
4932 
4933 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
4934   SDValue N0 = N->getOperand(0);
4935   EVT VT = N->getValueType(0);
4936 
4937   // fold (ctlz_zero_undef c1) -> c2
4938   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4939     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4940   return SDValue();
4941 }
4942 
4943 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
4944   SDValue N0 = N->getOperand(0);
4945   EVT VT = N->getValueType(0);
4946 
4947   // fold (cttz c1) -> c2
4948   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4949     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
4950   return SDValue();
4951 }
4952 
4953 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
4954   SDValue N0 = N->getOperand(0);
4955   EVT VT = N->getValueType(0);
4956 
4957   // fold (cttz_zero_undef c1) -> c2
4958   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4959     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4960   return SDValue();
4961 }
4962 
4963 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
4964   SDValue N0 = N->getOperand(0);
4965   EVT VT = N->getValueType(0);
4966 
4967   // fold (ctpop c1) -> c2
4968   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
4969     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
4970   return SDValue();
4971 }
4972 
4973 
4974 /// \brief Generate Min/Max node
4975 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
4976                                    SDValue True, SDValue False,
4977                                    ISD::CondCode CC, const TargetLowering &TLI,
4978                                    SelectionDAG &DAG) {
4979   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
4980     return SDValue();
4981 
4982   switch (CC) {
4983   case ISD::SETOLT:
4984   case ISD::SETOLE:
4985   case ISD::SETLT:
4986   case ISD::SETLE:
4987   case ISD::SETULT:
4988   case ISD::SETULE: {
4989     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
4990     if (TLI.isOperationLegal(Opcode, VT))
4991       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
4992     return SDValue();
4993   }
4994   case ISD::SETOGT:
4995   case ISD::SETOGE:
4996   case ISD::SETGT:
4997   case ISD::SETGE:
4998   case ISD::SETUGT:
4999   case ISD::SETUGE: {
5000     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5001     if (TLI.isOperationLegal(Opcode, VT))
5002       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5003     return SDValue();
5004   }
5005   default:
5006     return SDValue();
5007   }
5008 }
5009 
5010 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5011   SDValue N0 = N->getOperand(0);
5012   SDValue N1 = N->getOperand(1);
5013   SDValue N2 = N->getOperand(2);
5014   EVT VT = N->getValueType(0);
5015   EVT VT0 = N0.getValueType();
5016 
5017   // fold (select C, X, X) -> X
5018   if (N1 == N2)
5019     return N1;
5020   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5021     // fold (select true, X, Y) -> X
5022     // fold (select false, X, Y) -> Y
5023     return !N0C->isNullValue() ? N1 : N2;
5024   }
5025   // fold (select C, 1, X) -> (or C, X)
5026   if (VT == MVT::i1 && isOneConstant(N1))
5027     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5028   // fold (select C, 0, 1) -> (xor C, 1)
5029   // We can't do this reliably if integer based booleans have different contents
5030   // to floating point based booleans. This is because we can't tell whether we
5031   // have an integer-based boolean or a floating-point-based boolean unless we
5032   // can find the SETCC that produced it and inspect its operands. This is
5033   // fairly easy if C is the SETCC node, but it can potentially be
5034   // undiscoverable (or not reasonably discoverable). For example, it could be
5035   // in another basic block or it could require searching a complicated
5036   // expression.
5037   if (VT.isInteger() &&
5038       (VT0 == MVT::i1 || (VT0.isInteger() &&
5039                           TLI.getBooleanContents(false, false) ==
5040                               TLI.getBooleanContents(false, true) &&
5041                           TLI.getBooleanContents(false, false) ==
5042                               TargetLowering::ZeroOrOneBooleanContent)) &&
5043       isNullConstant(N1) && isOneConstant(N2)) {
5044     SDValue XORNode;
5045     if (VT == VT0) {
5046       SDLoc DL(N);
5047       return DAG.getNode(ISD::XOR, DL, VT0,
5048                          N0, DAG.getConstant(1, DL, VT0));
5049     }
5050     SDLoc DL0(N0);
5051     XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
5052                           N0, DAG.getConstant(1, DL0, VT0));
5053     AddToWorklist(XORNode.getNode());
5054     if (VT.bitsGT(VT0))
5055       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
5056     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
5057   }
5058   // fold (select C, 0, X) -> (and (not C), X)
5059   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5060     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5061     AddToWorklist(NOTNode.getNode());
5062     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5063   }
5064   // fold (select C, X, 1) -> (or (not C), X)
5065   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
5066     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5067     AddToWorklist(NOTNode.getNode());
5068     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
5069   }
5070   // fold (select C, X, 0) -> (and C, X)
5071   if (VT == MVT::i1 && isNullConstant(N2))
5072     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5073   // fold (select X, X, Y) -> (or X, Y)
5074   // fold (select X, 1, Y) -> (or X, Y)
5075   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5076     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5077   // fold (select X, Y, X) -> (and X, Y)
5078   // fold (select X, Y, 0) -> (and X, Y)
5079   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
5080     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5081 
5082   // If we can fold this based on the true/false value, do so.
5083   if (SimplifySelectOps(N, N1, N2))
5084     return SDValue(N, 0);  // Don't revisit N.
5085 
5086   if (VT0 == MVT::i1) {
5087     // The code in this block deals with the following 2 equivalences:
5088     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
5089     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
5090     // The target can specify its prefered form with the
5091     // shouldNormalizeToSelectSequence() callback. However we always transform
5092     // to the right anyway if we find the inner select exists in the DAG anyway
5093     // and we always transform to the left side if we know that we can further
5094     // optimize the combination of the conditions.
5095     bool normalizeToSequence
5096       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
5097     // select (and Cond0, Cond1), X, Y
5098     //   -> select Cond0, (select Cond1, X, Y), Y
5099     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5100       SDValue Cond0 = N0->getOperand(0);
5101       SDValue Cond1 = N0->getOperand(1);
5102       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5103                                         N1.getValueType(), Cond1, N1, N2);
5104       if (normalizeToSequence || !InnerSelect.use_empty())
5105         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5106                            InnerSelect, N2);
5107     }
5108     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5109     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5110       SDValue Cond0 = N0->getOperand(0);
5111       SDValue Cond1 = N0->getOperand(1);
5112       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5113                                         N1.getValueType(), Cond1, N1, N2);
5114       if (normalizeToSequence || !InnerSelect.use_empty())
5115         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5116                            InnerSelect);
5117     }
5118 
5119     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5120     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
5121       SDValue N1_0 = N1->getOperand(0);
5122       SDValue N1_1 = N1->getOperand(1);
5123       SDValue N1_2 = N1->getOperand(2);
5124       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5125         // Create the actual and node if we can generate good code for it.
5126         if (!normalizeToSequence) {
5127           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5128                                     N0, N1_0);
5129           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5130                              N1_1, N2);
5131         }
5132         // Otherwise see if we can optimize the "and" to a better pattern.
5133         if (SDValue Combined = visitANDLike(N0, N1_0, N))
5134           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5135                              N1_1, N2);
5136       }
5137     }
5138     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5139     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
5140       SDValue N2_0 = N2->getOperand(0);
5141       SDValue N2_1 = N2->getOperand(1);
5142       SDValue N2_2 = N2->getOperand(2);
5143       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5144         // Create the actual or node if we can generate good code for it.
5145         if (!normalizeToSequence) {
5146           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5147                                    N0, N2_0);
5148           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5149                              N1, N2_2);
5150         }
5151         // Otherwise see if we can optimize to a better pattern.
5152         if (SDValue Combined = visitORLike(N0, N2_0, N))
5153           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5154                              N1, N2_2);
5155       }
5156     }
5157   }
5158 
5159   // fold selects based on a setcc into other things, such as min/max/abs
5160   if (N0.getOpcode() == ISD::SETCC) {
5161     // select x, y (fcmp lt x, y) -> fminnum x, y
5162     // select x, y (fcmp gt x, y) -> fmaxnum x, y
5163     //
5164     // This is OK if we don't care about what happens if either operand is a
5165     // NaN.
5166     //
5167 
5168     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
5169     // no signed zeros as well as no nans.
5170     const TargetOptions &Options = DAG.getTarget().Options;
5171     if (Options.UnsafeFPMath &&
5172         VT.isFloatingPoint() && N0.hasOneUse() &&
5173         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
5174       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5175 
5176       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
5177                                                 N0.getOperand(1), N1, N2, CC,
5178                                                 TLI, DAG))
5179         return FMinMax;
5180     }
5181 
5182     if ((!LegalOperations &&
5183          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5184         TLI.isOperationLegal(ISD::SELECT_CC, VT))
5185       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5186                          N0.getOperand(0), N0.getOperand(1),
5187                          N1, N2, N0.getOperand(2));
5188     return SimplifySelect(SDLoc(N), N0, N1, N2);
5189   }
5190 
5191   return SDValue();
5192 }
5193 
5194 static
5195 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5196   SDLoc DL(N);
5197   EVT LoVT, HiVT;
5198   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5199 
5200   // Split the inputs.
5201   SDValue Lo, Hi, LL, LH, RL, RH;
5202   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5203   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5204 
5205   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5206   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5207 
5208   return std::make_pair(Lo, Hi);
5209 }
5210 
5211 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5212 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5213 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
5214   SDLoc dl(N);
5215   SDValue Cond = N->getOperand(0);
5216   SDValue LHS = N->getOperand(1);
5217   SDValue RHS = N->getOperand(2);
5218   EVT VT = N->getValueType(0);
5219   int NumElems = VT.getVectorNumElements();
5220   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
5221          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5222          Cond.getOpcode() == ISD::BUILD_VECTOR);
5223 
5224   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5225   // binary ones here.
5226   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5227     return SDValue();
5228 
5229   // We're sure we have an even number of elements due to the
5230   // concat_vectors we have as arguments to vselect.
5231   // Skip BV elements until we find one that's not an UNDEF
5232   // After we find an UNDEF element, keep looping until we get to half the
5233   // length of the BV and see if all the non-undef nodes are the same.
5234   ConstantSDNode *BottomHalf = nullptr;
5235   for (int i = 0; i < NumElems / 2; ++i) {
5236     if (Cond->getOperand(i)->isUndef())
5237       continue;
5238 
5239     if (BottomHalf == nullptr)
5240       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5241     else if (Cond->getOperand(i).getNode() != BottomHalf)
5242       return SDValue();
5243   }
5244 
5245   // Do the same for the second half of the BuildVector
5246   ConstantSDNode *TopHalf = nullptr;
5247   for (int i = NumElems / 2; i < NumElems; ++i) {
5248     if (Cond->getOperand(i)->isUndef())
5249       continue;
5250 
5251     if (TopHalf == nullptr)
5252       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5253     else if (Cond->getOperand(i).getNode() != TopHalf)
5254       return SDValue();
5255   }
5256 
5257   assert(TopHalf && BottomHalf &&
5258          "One half of the selector was all UNDEFs and the other was all the "
5259          "same value. This should have been addressed before this function.");
5260   return DAG.getNode(
5261       ISD::CONCAT_VECTORS, dl, VT,
5262       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5263       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5264 }
5265 
5266 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5267 
5268   if (Level >= AfterLegalizeTypes)
5269     return SDValue();
5270 
5271   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5272   SDValue Mask = MSC->getMask();
5273   SDValue Data  = MSC->getValue();
5274   SDLoc DL(N);
5275 
5276   // If the MSCATTER data type requires splitting and the mask is provided by a
5277   // SETCC, then split both nodes and its operands before legalization. This
5278   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5279   // and enables future optimizations (e.g. min/max pattern matching on X86).
5280   if (Mask.getOpcode() != ISD::SETCC)
5281     return SDValue();
5282 
5283   // Check if any splitting is required.
5284   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5285       TargetLowering::TypeSplitVector)
5286     return SDValue();
5287   SDValue MaskLo, MaskHi, Lo, Hi;
5288   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5289 
5290   EVT LoVT, HiVT;
5291   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5292 
5293   SDValue Chain = MSC->getChain();
5294 
5295   EVT MemoryVT = MSC->getMemoryVT();
5296   unsigned Alignment = MSC->getOriginalAlignment();
5297 
5298   EVT LoMemVT, HiMemVT;
5299   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5300 
5301   SDValue DataLo, DataHi;
5302   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5303 
5304   SDValue BasePtr = MSC->getBasePtr();
5305   SDValue IndexLo, IndexHi;
5306   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5307 
5308   MachineMemOperand *MMO = DAG.getMachineFunction().
5309     getMachineMemOperand(MSC->getPointerInfo(),
5310                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5311                           Alignment, MSC->getAAInfo(), MSC->getRanges());
5312 
5313   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5314   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5315                             DL, OpsLo, MMO);
5316 
5317   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5318   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5319                             DL, OpsHi, MMO);
5320 
5321   AddToWorklist(Lo.getNode());
5322   AddToWorklist(Hi.getNode());
5323 
5324   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5325 }
5326 
5327 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5328 
5329   if (Level >= AfterLegalizeTypes)
5330     return SDValue();
5331 
5332   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
5333   SDValue Mask = MST->getMask();
5334   SDValue Data  = MST->getValue();
5335   SDLoc DL(N);
5336 
5337   // If the MSTORE data type requires splitting and the mask is provided by a
5338   // SETCC, then split both nodes and its operands before legalization. This
5339   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5340   // and enables future optimizations (e.g. min/max pattern matching on X86).
5341   if (Mask.getOpcode() == ISD::SETCC) {
5342 
5343     // Check if any splitting is required.
5344     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5345         TargetLowering::TypeSplitVector)
5346       return SDValue();
5347 
5348     SDValue MaskLo, MaskHi, Lo, Hi;
5349     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5350 
5351     EVT LoVT, HiVT;
5352     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
5353 
5354     SDValue Chain = MST->getChain();
5355     SDValue Ptr   = MST->getBasePtr();
5356 
5357     EVT MemoryVT = MST->getMemoryVT();
5358     unsigned Alignment = MST->getOriginalAlignment();
5359 
5360     // if Alignment is equal to the vector size,
5361     // take the half of it for the second part
5362     unsigned SecondHalfAlignment =
5363       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
5364          Alignment/2 : Alignment;
5365 
5366     EVT LoMemVT, HiMemVT;
5367     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5368 
5369     SDValue DataLo, DataHi;
5370     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5371 
5372     MachineMemOperand *MMO = DAG.getMachineFunction().
5373       getMachineMemOperand(MST->getPointerInfo(),
5374                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5375                            Alignment, MST->getAAInfo(), MST->getRanges());
5376 
5377     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5378                             MST->isTruncatingStore());
5379 
5380     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5381     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5382                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5383 
5384     MMO = DAG.getMachineFunction().
5385       getMachineMemOperand(MST->getPointerInfo(),
5386                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
5387                            SecondHalfAlignment, MST->getAAInfo(),
5388                            MST->getRanges());
5389 
5390     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5391                             MST->isTruncatingStore());
5392 
5393     AddToWorklist(Lo.getNode());
5394     AddToWorklist(Hi.getNode());
5395 
5396     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5397   }
5398   return SDValue();
5399 }
5400 
5401 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5402 
5403   if (Level >= AfterLegalizeTypes)
5404     return SDValue();
5405 
5406   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
5407   SDValue Mask = MGT->getMask();
5408   SDLoc DL(N);
5409 
5410   // If the MGATHER result requires splitting and the mask is provided by a
5411   // SETCC, then split both nodes and its operands before legalization. This
5412   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5413   // and enables future optimizations (e.g. min/max pattern matching on X86).
5414 
5415   if (Mask.getOpcode() != ISD::SETCC)
5416     return SDValue();
5417 
5418   EVT VT = N->getValueType(0);
5419 
5420   // Check if any splitting is required.
5421   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5422       TargetLowering::TypeSplitVector)
5423     return SDValue();
5424 
5425   SDValue MaskLo, MaskHi, Lo, Hi;
5426   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5427 
5428   SDValue Src0 = MGT->getValue();
5429   SDValue Src0Lo, Src0Hi;
5430   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5431 
5432   EVT LoVT, HiVT;
5433   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5434 
5435   SDValue Chain = MGT->getChain();
5436   EVT MemoryVT = MGT->getMemoryVT();
5437   unsigned Alignment = MGT->getOriginalAlignment();
5438 
5439   EVT LoMemVT, HiMemVT;
5440   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5441 
5442   SDValue BasePtr = MGT->getBasePtr();
5443   SDValue Index = MGT->getIndex();
5444   SDValue IndexLo, IndexHi;
5445   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5446 
5447   MachineMemOperand *MMO = DAG.getMachineFunction().
5448     getMachineMemOperand(MGT->getPointerInfo(),
5449                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5450                           Alignment, MGT->getAAInfo(), MGT->getRanges());
5451 
5452   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5453   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5454                             MMO);
5455 
5456   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5457   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5458                             MMO);
5459 
5460   AddToWorklist(Lo.getNode());
5461   AddToWorklist(Hi.getNode());
5462 
5463   // Build a factor node to remember that this load is independent of the
5464   // other one.
5465   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5466                       Hi.getValue(1));
5467 
5468   // Legalized the chain result - switch anything that used the old chain to
5469   // use the new one.
5470   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5471 
5472   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5473 
5474   SDValue RetOps[] = { GatherRes, Chain };
5475   return DAG.getMergeValues(RetOps, DL);
5476 }
5477 
5478 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5479 
5480   if (Level >= AfterLegalizeTypes)
5481     return SDValue();
5482 
5483   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
5484   SDValue Mask = MLD->getMask();
5485   SDLoc DL(N);
5486 
5487   // If the MLOAD result requires splitting and the mask is provided by a
5488   // SETCC, then split both nodes and its operands before legalization. This
5489   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5490   // and enables future optimizations (e.g. min/max pattern matching on X86).
5491 
5492   if (Mask.getOpcode() == ISD::SETCC) {
5493     EVT VT = N->getValueType(0);
5494 
5495     // Check if any splitting is required.
5496     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5497         TargetLowering::TypeSplitVector)
5498       return SDValue();
5499 
5500     SDValue MaskLo, MaskHi, Lo, Hi;
5501     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5502 
5503     SDValue Src0 = MLD->getSrc0();
5504     SDValue Src0Lo, Src0Hi;
5505     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5506 
5507     EVT LoVT, HiVT;
5508     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5509 
5510     SDValue Chain = MLD->getChain();
5511     SDValue Ptr   = MLD->getBasePtr();
5512     EVT MemoryVT = MLD->getMemoryVT();
5513     unsigned Alignment = MLD->getOriginalAlignment();
5514 
5515     // if Alignment is equal to the vector size,
5516     // take the half of it for the second part
5517     unsigned SecondHalfAlignment =
5518       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5519          Alignment/2 : Alignment;
5520 
5521     EVT LoMemVT, HiMemVT;
5522     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5523 
5524     MachineMemOperand *MMO = DAG.getMachineFunction().
5525     getMachineMemOperand(MLD->getPointerInfo(),
5526                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5527                          Alignment, MLD->getAAInfo(), MLD->getRanges());
5528 
5529     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5530                            ISD::NON_EXTLOAD);
5531 
5532     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5533     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5534                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5535 
5536     MMO = DAG.getMachineFunction().
5537     getMachineMemOperand(MLD->getPointerInfo(),
5538                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
5539                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5540 
5541     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5542                            ISD::NON_EXTLOAD);
5543 
5544     AddToWorklist(Lo.getNode());
5545     AddToWorklist(Hi.getNode());
5546 
5547     // Build a factor node to remember that this load is independent of the
5548     // other one.
5549     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5550                         Hi.getValue(1));
5551 
5552     // Legalized the chain result - switch anything that used the old chain to
5553     // use the new one.
5554     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5555 
5556     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5557 
5558     SDValue RetOps[] = { LoadRes, Chain };
5559     return DAG.getMergeValues(RetOps, DL);
5560   }
5561   return SDValue();
5562 }
5563 
5564 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5565   SDValue N0 = N->getOperand(0);
5566   SDValue N1 = N->getOperand(1);
5567   SDValue N2 = N->getOperand(2);
5568   SDLoc DL(N);
5569 
5570   // Canonicalize integer abs.
5571   // vselect (setg[te] X,  0),  X, -X ->
5572   // vselect (setgt    X, -1),  X, -X ->
5573   // vselect (setl[te] X,  0), -X,  X ->
5574   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5575   if (N0.getOpcode() == ISD::SETCC) {
5576     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5577     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5578     bool isAbs = false;
5579     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5580 
5581     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5582          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5583         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5584       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
5585     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5586              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5587       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5588 
5589     if (isAbs) {
5590       EVT VT = LHS.getValueType();
5591       SDValue Shift = DAG.getNode(
5592           ISD::SRA, DL, VT, LHS,
5593           DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
5594       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5595       AddToWorklist(Shift.getNode());
5596       AddToWorklist(Add.getNode());
5597       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5598     }
5599   }
5600 
5601   if (SimplifySelectOps(N, N1, N2))
5602     return SDValue(N, 0);  // Don't revisit N.
5603 
5604   // If the VSELECT result requires splitting and the mask is provided by a
5605   // SETCC, then split both nodes and its operands before legalization. This
5606   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5607   // and enables future optimizations (e.g. min/max pattern matching on X86).
5608   if (N0.getOpcode() == ISD::SETCC) {
5609     EVT VT = N->getValueType(0);
5610 
5611     // Check if any splitting is required.
5612     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5613         TargetLowering::TypeSplitVector)
5614       return SDValue();
5615 
5616     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5617     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5618     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5619     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5620 
5621     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5622     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5623 
5624     // Add the new VSELECT nodes to the work list in case they need to be split
5625     // again.
5626     AddToWorklist(Lo.getNode());
5627     AddToWorklist(Hi.getNode());
5628 
5629     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5630   }
5631 
5632   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5633   if (ISD::isBuildVectorAllOnes(N0.getNode()))
5634     return N1;
5635   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5636   if (ISD::isBuildVectorAllZeros(N0.getNode()))
5637     return N2;
5638 
5639   // The ConvertSelectToConcatVector function is assuming both the above
5640   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5641   // and addressed.
5642   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5643       N2.getOpcode() == ISD::CONCAT_VECTORS &&
5644       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
5645     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
5646       return CV;
5647   }
5648 
5649   return SDValue();
5650 }
5651 
5652 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5653   SDValue N0 = N->getOperand(0);
5654   SDValue N1 = N->getOperand(1);
5655   SDValue N2 = N->getOperand(2);
5656   SDValue N3 = N->getOperand(3);
5657   SDValue N4 = N->getOperand(4);
5658   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5659 
5660   // fold select_cc lhs, rhs, x, x, cc -> x
5661   if (N2 == N3)
5662     return N2;
5663 
5664   // Determine if the condition we're dealing with is constant
5665   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
5666                                   CC, SDLoc(N), false)) {
5667     AddToWorklist(SCC.getNode());
5668 
5669     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5670       if (!SCCC->isNullValue())
5671         return N2;    // cond always true -> true val
5672       else
5673         return N3;    // cond always false -> false val
5674     } else if (SCC->isUndef()) {
5675       // When the condition is UNDEF, just return the first operand. This is
5676       // coherent the DAG creation, no setcc node is created in this case
5677       return N2;
5678     } else if (SCC.getOpcode() == ISD::SETCC) {
5679       // Fold to a simpler select_cc
5680       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
5681                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
5682                          SCC.getOperand(2));
5683     }
5684   }
5685 
5686   // If we can fold this based on the true/false value, do so.
5687   if (SimplifySelectOps(N, N2, N3))
5688     return SDValue(N, 0);  // Don't revisit N.
5689 
5690   // fold select_cc into other things, such as min/max/abs
5691   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
5692 }
5693 
5694 SDValue DAGCombiner::visitSETCC(SDNode *N) {
5695   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
5696                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
5697                        SDLoc(N));
5698 }
5699 
5700 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
5701   SDValue LHS = N->getOperand(0);
5702   SDValue RHS = N->getOperand(1);
5703   SDValue Carry = N->getOperand(2);
5704   SDValue Cond = N->getOperand(3);
5705 
5706   // If Carry is false, fold to a regular SETCC.
5707   if (Carry.getOpcode() == ISD::CARRY_FALSE)
5708     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
5709 
5710   return SDValue();
5711 }
5712 
5713 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
5714 /// a build_vector of constants.
5715 /// This function is called by the DAGCombiner when visiting sext/zext/aext
5716 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
5717 /// Vector extends are not folded if operations are legal; this is to
5718 /// avoid introducing illegal build_vector dag nodes.
5719 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
5720                                          SelectionDAG &DAG, bool LegalTypes,
5721                                          bool LegalOperations) {
5722   unsigned Opcode = N->getOpcode();
5723   SDValue N0 = N->getOperand(0);
5724   EVT VT = N->getValueType(0);
5725 
5726   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
5727          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5728          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
5729          && "Expected EXTEND dag node in input!");
5730 
5731   // fold (sext c1) -> c1
5732   // fold (zext c1) -> c1
5733   // fold (aext c1) -> c1
5734   if (isa<ConstantSDNode>(N0))
5735     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
5736 
5737   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
5738   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
5739   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
5740   EVT SVT = VT.getScalarType();
5741   if (!(VT.isVector() &&
5742       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
5743       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
5744     return nullptr;
5745 
5746   // We can fold this node into a build_vector.
5747   unsigned VTBits = SVT.getSizeInBits();
5748   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
5749   SmallVector<SDValue, 8> Elts;
5750   unsigned NumElts = VT.getVectorNumElements();
5751   SDLoc DL(N);
5752 
5753   for (unsigned i=0; i != NumElts; ++i) {
5754     SDValue Op = N0->getOperand(i);
5755     if (Op->isUndef()) {
5756       Elts.push_back(DAG.getUNDEF(SVT));
5757       continue;
5758     }
5759 
5760     SDLoc DL(Op);
5761     // Get the constant value and if needed trunc it to the size of the type.
5762     // Nodes like build_vector might have constants wider than the scalar type.
5763     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
5764     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5765       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
5766     else
5767       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
5768   }
5769 
5770   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
5771 }
5772 
5773 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
5774 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
5775 // transformation. Returns true if extension are possible and the above
5776 // mentioned transformation is profitable.
5777 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
5778                                     unsigned ExtOpc,
5779                                     SmallVectorImpl<SDNode *> &ExtendNodes,
5780                                     const TargetLowering &TLI) {
5781   bool HasCopyToRegUses = false;
5782   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
5783   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
5784                             UE = N0.getNode()->use_end();
5785        UI != UE; ++UI) {
5786     SDNode *User = *UI;
5787     if (User == N)
5788       continue;
5789     if (UI.getUse().getResNo() != N0.getResNo())
5790       continue;
5791     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
5792     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
5793       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
5794       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
5795         // Sign bits will be lost after a zext.
5796         return false;
5797       bool Add = false;
5798       for (unsigned i = 0; i != 2; ++i) {
5799         SDValue UseOp = User->getOperand(i);
5800         if (UseOp == N0)
5801           continue;
5802         if (!isa<ConstantSDNode>(UseOp))
5803           return false;
5804         Add = true;
5805       }
5806       if (Add)
5807         ExtendNodes.push_back(User);
5808       continue;
5809     }
5810     // If truncates aren't free and there are users we can't
5811     // extend, it isn't worthwhile.
5812     if (!isTruncFree)
5813       return false;
5814     // Remember if this value is live-out.
5815     if (User->getOpcode() == ISD::CopyToReg)
5816       HasCopyToRegUses = true;
5817   }
5818 
5819   if (HasCopyToRegUses) {
5820     bool BothLiveOut = false;
5821     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5822          UI != UE; ++UI) {
5823       SDUse &Use = UI.getUse();
5824       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
5825         BothLiveOut = true;
5826         break;
5827       }
5828     }
5829     if (BothLiveOut)
5830       // Both unextended and extended values are live out. There had better be
5831       // a good reason for the transformation.
5832       return ExtendNodes.size();
5833   }
5834   return true;
5835 }
5836 
5837 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
5838                                   SDValue Trunc, SDValue ExtLoad, SDLoc DL,
5839                                   ISD::NodeType ExtType) {
5840   // Extend SetCC uses if necessary.
5841   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
5842     SDNode *SetCC = SetCCs[i];
5843     SmallVector<SDValue, 4> Ops;
5844 
5845     for (unsigned j = 0; j != 2; ++j) {
5846       SDValue SOp = SetCC->getOperand(j);
5847       if (SOp == Trunc)
5848         Ops.push_back(ExtLoad);
5849       else
5850         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
5851     }
5852 
5853     Ops.push_back(SetCC->getOperand(2));
5854     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
5855   }
5856 }
5857 
5858 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
5859 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
5860   SDValue N0 = N->getOperand(0);
5861   EVT DstVT = N->getValueType(0);
5862   EVT SrcVT = N0.getValueType();
5863 
5864   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
5865           N->getOpcode() == ISD::ZERO_EXTEND) &&
5866          "Unexpected node type (not an extend)!");
5867 
5868   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
5869   // For example, on a target with legal v4i32, but illegal v8i32, turn:
5870   //   (v8i32 (sext (v8i16 (load x))))
5871   // into:
5872   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
5873   //                          (v4i32 (sextload (x + 16)))))
5874   // Where uses of the original load, i.e.:
5875   //   (v8i16 (load x))
5876   // are replaced with:
5877   //   (v8i16 (truncate
5878   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
5879   //                            (v4i32 (sextload (x + 16)))))))
5880   //
5881   // This combine is only applicable to illegal, but splittable, vectors.
5882   // All legal types, and illegal non-vector types, are handled elsewhere.
5883   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
5884   //
5885   if (N0->getOpcode() != ISD::LOAD)
5886     return SDValue();
5887 
5888   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5889 
5890   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
5891       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
5892       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
5893     return SDValue();
5894 
5895   SmallVector<SDNode *, 4> SetCCs;
5896   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
5897     return SDValue();
5898 
5899   ISD::LoadExtType ExtType =
5900       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
5901 
5902   // Try to split the vector types to get down to legal types.
5903   EVT SplitSrcVT = SrcVT;
5904   EVT SplitDstVT = DstVT;
5905   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
5906          SplitSrcVT.getVectorNumElements() > 1) {
5907     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
5908     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
5909   }
5910 
5911   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
5912     return SDValue();
5913 
5914   SDLoc DL(N);
5915   const unsigned NumSplits =
5916       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
5917   const unsigned Stride = SplitSrcVT.getStoreSize();
5918   SmallVector<SDValue, 4> Loads;
5919   SmallVector<SDValue, 4> Chains;
5920 
5921   SDValue BasePtr = LN0->getBasePtr();
5922   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
5923     const unsigned Offset = Idx * Stride;
5924     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
5925 
5926     SDValue SplitLoad = DAG.getExtLoad(
5927         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
5928         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
5929         LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
5930         Align, LN0->getAAInfo());
5931 
5932     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
5933                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
5934 
5935     Loads.push_back(SplitLoad.getValue(0));
5936     Chains.push_back(SplitLoad.getValue(1));
5937   }
5938 
5939   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
5940   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
5941 
5942   CombineTo(N, NewValue);
5943 
5944   // Replace uses of the original load (before extension)
5945   // with a truncate of the concatenated sextloaded vectors.
5946   SDValue Trunc =
5947       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
5948   CombineTo(N0.getNode(), Trunc, NewChain);
5949   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
5950                   (ISD::NodeType)N->getOpcode());
5951   return SDValue(N, 0); // Return N so it doesn't get rechecked!
5952 }
5953 
5954 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
5955   SDValue N0 = N->getOperand(0);
5956   EVT VT = N->getValueType(0);
5957 
5958   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
5959                                               LegalOperations))
5960     return SDValue(Res, 0);
5961 
5962   // fold (sext (sext x)) -> (sext x)
5963   // fold (sext (aext x)) -> (sext x)
5964   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
5965     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
5966                        N0.getOperand(0));
5967 
5968   if (N0.getOpcode() == ISD::TRUNCATE) {
5969     // fold (sext (truncate (load x))) -> (sext (smaller load x))
5970     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
5971     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
5972       SDNode* oye = N0.getNode()->getOperand(0).getNode();
5973       if (NarrowLoad.getNode() != N0.getNode()) {
5974         CombineTo(N0.getNode(), NarrowLoad);
5975         // CombineTo deleted the truncate, if needed, but not what's under it.
5976         AddToWorklist(oye);
5977       }
5978       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5979     }
5980 
5981     // See if the value being truncated is already sign extended.  If so, just
5982     // eliminate the trunc/sext pair.
5983     SDValue Op = N0.getOperand(0);
5984     unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
5985     unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
5986     unsigned DestBits = VT.getScalarType().getSizeInBits();
5987     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
5988 
5989     if (OpBits == DestBits) {
5990       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
5991       // bits, it is already ready.
5992       if (NumSignBits > DestBits-MidBits)
5993         return Op;
5994     } else if (OpBits < DestBits) {
5995       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
5996       // bits, just sext from i32.
5997       if (NumSignBits > OpBits-MidBits)
5998         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
5999     } else {
6000       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6001       // bits, just truncate to i32.
6002       if (NumSignBits > OpBits-MidBits)
6003         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6004     }
6005 
6006     // fold (sext (truncate x)) -> (sextinreg x).
6007     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6008                                                  N0.getValueType())) {
6009       if (OpBits < DestBits)
6010         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6011       else if (OpBits > DestBits)
6012         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6013       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
6014                          DAG.getValueType(N0.getValueType()));
6015     }
6016   }
6017 
6018   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6019   // Only generate vector extloads when 1) they're legal, and 2) they are
6020   // deemed desirable by the target.
6021   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6022       ((!LegalOperations && !VT.isVector() &&
6023         !cast<LoadSDNode>(N0)->isVolatile()) ||
6024        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6025     bool DoXform = true;
6026     SmallVector<SDNode*, 4> SetCCs;
6027     if (!N0.hasOneUse())
6028       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6029     if (VT.isVector())
6030       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6031     if (DoXform) {
6032       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6033       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6034                                        LN0->getChain(),
6035                                        LN0->getBasePtr(), N0.getValueType(),
6036                                        LN0->getMemOperand());
6037       CombineTo(N, ExtLoad);
6038       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6039                                   N0.getValueType(), ExtLoad);
6040       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6041       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6042                       ISD::SIGN_EXTEND);
6043       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6044     }
6045   }
6046 
6047   // fold (sext (load x)) to multiple smaller sextloads.
6048   // Only on illegal but splittable vectors.
6049   if (SDValue ExtLoad = CombineExtLoad(N))
6050     return ExtLoad;
6051 
6052   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6053   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6054   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6055       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6056     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6057     EVT MemVT = LN0->getMemoryVT();
6058     if ((!LegalOperations && !LN0->isVolatile()) ||
6059         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6060       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6061                                        LN0->getChain(),
6062                                        LN0->getBasePtr(), MemVT,
6063                                        LN0->getMemOperand());
6064       CombineTo(N, ExtLoad);
6065       CombineTo(N0.getNode(),
6066                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6067                             N0.getValueType(), ExtLoad),
6068                 ExtLoad.getValue(1));
6069       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6070     }
6071   }
6072 
6073   // fold (sext (and/or/xor (load x), cst)) ->
6074   //      (and/or/xor (sextload x), (sext cst))
6075   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6076        N0.getOpcode() == ISD::XOR) &&
6077       isa<LoadSDNode>(N0.getOperand(0)) &&
6078       N0.getOperand(1).getOpcode() == ISD::Constant &&
6079       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6080       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6081     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6082     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6083       bool DoXform = true;
6084       SmallVector<SDNode*, 4> SetCCs;
6085       if (!N0.hasOneUse())
6086         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
6087                                           SetCCs, TLI);
6088       if (DoXform) {
6089         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
6090                                          LN0->getChain(), LN0->getBasePtr(),
6091                                          LN0->getMemoryVT(),
6092                                          LN0->getMemOperand());
6093         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6094         Mask = Mask.sext(VT.getSizeInBits());
6095         SDLoc DL(N);
6096         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6097                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6098         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6099                                     SDLoc(N0.getOperand(0)),
6100                                     N0.getOperand(0).getValueType(), ExtLoad);
6101         CombineTo(N, And);
6102         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6103         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6104                         ISD::SIGN_EXTEND);
6105         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6106       }
6107     }
6108   }
6109 
6110   if (N0.getOpcode() == ISD::SETCC) {
6111     EVT N0VT = N0.getOperand(0).getValueType();
6112     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
6113     // Only do this before legalize for now.
6114     if (VT.isVector() && !LegalOperations &&
6115         TLI.getBooleanContents(N0VT) ==
6116             TargetLowering::ZeroOrNegativeOneBooleanContent) {
6117       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
6118       // of the same size as the compared operands. Only optimize sext(setcc())
6119       // if this is the case.
6120       EVT SVT = getSetCCResultType(N0VT);
6121 
6122       // We know that the # elements of the results is the same as the
6123       // # elements of the compare (and the # elements of the compare result
6124       // for that matter).  Check to see that they are the same size.  If so,
6125       // we know that the element size of the sext'd result matches the
6126       // element size of the compare operands.
6127       if (VT.getSizeInBits() == SVT.getSizeInBits())
6128         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6129                              N0.getOperand(1),
6130                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6131 
6132       // If the desired elements are smaller or larger than the source
6133       // elements we can use a matching integer vector type and then
6134       // truncate/sign extend
6135       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6136       if (SVT == MatchingVectorType) {
6137         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6138                                N0.getOperand(0), N0.getOperand(1),
6139                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
6140         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6141       }
6142     }
6143 
6144     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
6145     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
6146     SDLoc DL(N);
6147     SDValue NegOne =
6148       DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
6149     if (SDValue SCC = SimplifySelectCC(
6150             DL, N0.getOperand(0), N0.getOperand(1), NegOne,
6151             DAG.getConstant(0, DL, VT),
6152             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6153       return SCC;
6154 
6155     if (!VT.isVector()) {
6156       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6157       if (!LegalOperations ||
6158           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
6159         SDLoc DL(N);
6160         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6161         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
6162                                      N0.getOperand(0), N0.getOperand(1), CC);
6163         return DAG.getSelect(DL, VT, SetCC,
6164                              NegOne, DAG.getConstant(0, DL, VT));
6165       }
6166     }
6167   }
6168 
6169   // fold (sext x) -> (zext x) if the sign bit is known zero.
6170   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6171       DAG.SignBitIsZero(N0))
6172     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6173 
6174   return SDValue();
6175 }
6176 
6177 // isTruncateOf - If N is a truncate of some other value, return true, record
6178 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6179 // This function computes KnownZero to avoid a duplicated call to
6180 // computeKnownBits in the caller.
6181 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6182                          APInt &KnownZero) {
6183   APInt KnownOne;
6184   if (N->getOpcode() == ISD::TRUNCATE) {
6185     Op = N->getOperand(0);
6186     DAG.computeKnownBits(Op, KnownZero, KnownOne);
6187     return true;
6188   }
6189 
6190   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6191       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6192     return false;
6193 
6194   SDValue Op0 = N->getOperand(0);
6195   SDValue Op1 = N->getOperand(1);
6196   assert(Op0.getValueType() == Op1.getValueType());
6197 
6198   if (isNullConstant(Op0))
6199     Op = Op1;
6200   else if (isNullConstant(Op1))
6201     Op = Op0;
6202   else
6203     return false;
6204 
6205   DAG.computeKnownBits(Op, KnownZero, KnownOne);
6206 
6207   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6208     return false;
6209 
6210   return true;
6211 }
6212 
6213 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6214   SDValue N0 = N->getOperand(0);
6215   EVT VT = N->getValueType(0);
6216 
6217   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6218                                               LegalOperations))
6219     return SDValue(Res, 0);
6220 
6221   // fold (zext (zext x)) -> (zext x)
6222   // fold (zext (aext x)) -> (zext x)
6223   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6224     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6225                        N0.getOperand(0));
6226 
6227   // fold (zext (truncate x)) -> (zext x) or
6228   //      (zext (truncate x)) -> (truncate x)
6229   // This is valid when the truncated bits of x are already zero.
6230   // FIXME: We should extend this to work for vectors too.
6231   SDValue Op;
6232   APInt KnownZero;
6233   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6234     APInt TruncatedBits =
6235       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6236       APInt(Op.getValueSizeInBits(), 0) :
6237       APInt::getBitsSet(Op.getValueSizeInBits(),
6238                         N0.getValueSizeInBits(),
6239                         std::min(Op.getValueSizeInBits(),
6240                                  VT.getSizeInBits()));
6241     if (TruncatedBits == (KnownZero & TruncatedBits)) {
6242       if (VT.bitsGT(Op.getValueType()))
6243         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6244       if (VT.bitsLT(Op.getValueType()))
6245         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6246 
6247       return Op;
6248     }
6249   }
6250 
6251   // fold (zext (truncate (load x))) -> (zext (smaller load x))
6252   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6253   if (N0.getOpcode() == ISD::TRUNCATE) {
6254     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6255       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6256       if (NarrowLoad.getNode() != N0.getNode()) {
6257         CombineTo(N0.getNode(), NarrowLoad);
6258         // CombineTo deleted the truncate, if needed, but not what's under it.
6259         AddToWorklist(oye);
6260       }
6261       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6262     }
6263   }
6264 
6265   // fold (zext (truncate x)) -> (and x, mask)
6266   if (N0.getOpcode() == ISD::TRUNCATE) {
6267     // fold (zext (truncate (load x))) -> (zext (smaller load x))
6268     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6269     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6270       SDNode *oye = N0.getNode()->getOperand(0).getNode();
6271       if (NarrowLoad.getNode() != N0.getNode()) {
6272         CombineTo(N0.getNode(), NarrowLoad);
6273         // CombineTo deleted the truncate, if needed, but not what's under it.
6274         AddToWorklist(oye);
6275       }
6276       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6277     }
6278 
6279     EVT SrcVT = N0.getOperand(0).getValueType();
6280     EVT MinVT = N0.getValueType();
6281 
6282     // Try to mask before the extension to avoid having to generate a larger mask,
6283     // possibly over several sub-vectors.
6284     if (SrcVT.bitsLT(VT)) {
6285       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
6286                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
6287         SDValue Op = N0.getOperand(0);
6288         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6289         AddToWorklist(Op.getNode());
6290         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
6291       }
6292     }
6293 
6294     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
6295       SDValue Op = N0.getOperand(0);
6296       if (SrcVT.bitsLT(VT)) {
6297         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6298         AddToWorklist(Op.getNode());
6299       } else if (SrcVT.bitsGT(VT)) {
6300         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6301         AddToWorklist(Op.getNode());
6302       }
6303       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6304     }
6305   }
6306 
6307   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6308   // if either of the casts is not free.
6309   if (N0.getOpcode() == ISD::AND &&
6310       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6311       N0.getOperand(1).getOpcode() == ISD::Constant &&
6312       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6313                            N0.getValueType()) ||
6314        !TLI.isZExtFree(N0.getValueType(), VT))) {
6315     SDValue X = N0.getOperand(0).getOperand(0);
6316     if (X.getValueType().bitsLT(VT)) {
6317       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6318     } else if (X.getValueType().bitsGT(VT)) {
6319       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6320     }
6321     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6322     Mask = Mask.zext(VT.getSizeInBits());
6323     SDLoc DL(N);
6324     return DAG.getNode(ISD::AND, DL, VT,
6325                        X, DAG.getConstant(Mask, DL, VT));
6326   }
6327 
6328   // fold (zext (load x)) -> (zext (truncate (zextload x)))
6329   // Only generate vector extloads when 1) they're legal, and 2) they are
6330   // deemed desirable by the target.
6331   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6332       ((!LegalOperations && !VT.isVector() &&
6333         !cast<LoadSDNode>(N0)->isVolatile()) ||
6334        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6335     bool DoXform = true;
6336     SmallVector<SDNode*, 4> SetCCs;
6337     if (!N0.hasOneUse())
6338       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6339     if (VT.isVector())
6340       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6341     if (DoXform) {
6342       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6343       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6344                                        LN0->getChain(),
6345                                        LN0->getBasePtr(), N0.getValueType(),
6346                                        LN0->getMemOperand());
6347       CombineTo(N, ExtLoad);
6348       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6349                                   N0.getValueType(), ExtLoad);
6350       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6351 
6352       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6353                       ISD::ZERO_EXTEND);
6354       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6355     }
6356   }
6357 
6358   // fold (zext (load x)) to multiple smaller zextloads.
6359   // Only on illegal but splittable vectors.
6360   if (SDValue ExtLoad = CombineExtLoad(N))
6361     return ExtLoad;
6362 
6363   // fold (zext (and/or/xor (load x), cst)) ->
6364   //      (and/or/xor (zextload x), (zext cst))
6365   // Unless (and (load x) cst) will match as a zextload already and has
6366   // additional users.
6367   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6368        N0.getOpcode() == ISD::XOR) &&
6369       isa<LoadSDNode>(N0.getOperand(0)) &&
6370       N0.getOperand(1).getOpcode() == ISD::Constant &&
6371       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6372       (!LegalOperations && TLI.isOperationLegalOrCustom(N0.getOpcode(), VT))) {
6373     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6374     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6375       bool DoXform = true;
6376       SmallVector<SDNode*, 4> SetCCs;
6377       if (!N0.hasOneUse()) {
6378         if (N0.getOpcode() == ISD::AND) {
6379           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
6380           auto NarrowLoad = false;
6381           EVT LoadResultTy = AndC->getValueType(0);
6382           EVT ExtVT, LoadedVT;
6383           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
6384                                NarrowLoad))
6385             DoXform = false;
6386         }
6387         if (DoXform)
6388           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
6389                                             ISD::ZERO_EXTEND, SetCCs, TLI);
6390       }
6391       if (DoXform) {
6392         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6393                                          LN0->getChain(), LN0->getBasePtr(),
6394                                          LN0->getMemoryVT(),
6395                                          LN0->getMemOperand());
6396         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6397         Mask = Mask.zext(VT.getSizeInBits());
6398         SDLoc DL(N);
6399         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6400                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6401         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6402                                     SDLoc(N0.getOperand(0)),
6403                                     N0.getOperand(0).getValueType(), ExtLoad);
6404         CombineTo(N, And);
6405         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6406         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6407                         ISD::ZERO_EXTEND);
6408         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6409       }
6410     }
6411   }
6412 
6413   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6414   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6415   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6416       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6417     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6418     EVT MemVT = LN0->getMemoryVT();
6419     if ((!LegalOperations && !LN0->isVolatile()) ||
6420         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6421       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6422                                        LN0->getChain(),
6423                                        LN0->getBasePtr(), MemVT,
6424                                        LN0->getMemOperand());
6425       CombineTo(N, ExtLoad);
6426       CombineTo(N0.getNode(),
6427                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6428                             ExtLoad),
6429                 ExtLoad.getValue(1));
6430       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6431     }
6432   }
6433 
6434   if (N0.getOpcode() == ISD::SETCC) {
6435     if (!LegalOperations && VT.isVector() &&
6436         N0.getValueType().getVectorElementType() == MVT::i1) {
6437       EVT N0VT = N0.getOperand(0).getValueType();
6438       if (getSetCCResultType(N0VT) == N0.getValueType())
6439         return SDValue();
6440 
6441       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6442       // Only do this before legalize for now.
6443       SDLoc DL(N);
6444       SDValue VecOnes = DAG.getConstant(1, DL, VT);
6445       if (VT.getSizeInBits() == N0VT.getSizeInBits())
6446         // We know that the # elements of the results is the same as the
6447         // # elements of the compare (and the # elements of the compare result
6448         // for that matter).  Check to see that they are the same size.  If so,
6449         // we know that the element size of the sext'd result matches the
6450         // element size of the compare operands.
6451         return DAG.getNode(ISD::AND, DL, VT,
6452                            DAG.getSetCC(DL, VT, N0.getOperand(0),
6453                                          N0.getOperand(1),
6454                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
6455                            VecOnes);
6456 
6457       // If the desired elements are smaller or larger than the source
6458       // elements we can use a matching integer vector type and then
6459       // truncate/sign extend
6460       EVT MatchingElementType =
6461         EVT::getIntegerVT(*DAG.getContext(),
6462                           N0VT.getScalarType().getSizeInBits());
6463       EVT MatchingVectorType =
6464         EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
6465                          N0VT.getVectorNumElements());
6466       SDValue VsetCC =
6467         DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
6468                       N0.getOperand(1),
6469                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
6470       return DAG.getNode(ISD::AND, DL, VT,
6471                          DAG.getSExtOrTrunc(VsetCC, DL, VT),
6472                          VecOnes);
6473     }
6474 
6475     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6476     SDLoc DL(N);
6477     if (SDValue SCC = SimplifySelectCC(
6478             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6479             DAG.getConstant(0, DL, VT),
6480             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6481       return SCC;
6482   }
6483 
6484   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6485   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6486       isa<ConstantSDNode>(N0.getOperand(1)) &&
6487       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6488       N0.hasOneUse()) {
6489     SDValue ShAmt = N0.getOperand(1);
6490     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6491     if (N0.getOpcode() == ISD::SHL) {
6492       SDValue InnerZExt = N0.getOperand(0);
6493       // If the original shl may be shifting out bits, do not perform this
6494       // transformation.
6495       unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
6496         InnerZExt.getOperand(0).getValueType().getSizeInBits();
6497       if (ShAmtVal > KnownZeroBits)
6498         return SDValue();
6499     }
6500 
6501     SDLoc DL(N);
6502 
6503     // Ensure that the shift amount is wide enough for the shifted value.
6504     if (VT.getSizeInBits() >= 256)
6505       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6506 
6507     return DAG.getNode(N0.getOpcode(), DL, VT,
6508                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6509                        ShAmt);
6510   }
6511 
6512   return SDValue();
6513 }
6514 
6515 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6516   SDValue N0 = N->getOperand(0);
6517   EVT VT = N->getValueType(0);
6518 
6519   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6520                                               LegalOperations))
6521     return SDValue(Res, 0);
6522 
6523   // fold (aext (aext x)) -> (aext x)
6524   // fold (aext (zext x)) -> (zext x)
6525   // fold (aext (sext x)) -> (sext x)
6526   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
6527       N0.getOpcode() == ISD::ZERO_EXTEND ||
6528       N0.getOpcode() == ISD::SIGN_EXTEND)
6529     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6530 
6531   // fold (aext (truncate (load x))) -> (aext (smaller load x))
6532   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6533   if (N0.getOpcode() == ISD::TRUNCATE) {
6534     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6535       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6536       if (NarrowLoad.getNode() != N0.getNode()) {
6537         CombineTo(N0.getNode(), NarrowLoad);
6538         // CombineTo deleted the truncate, if needed, but not what's under it.
6539         AddToWorklist(oye);
6540       }
6541       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6542     }
6543   }
6544 
6545   // fold (aext (truncate x))
6546   if (N0.getOpcode() == ISD::TRUNCATE) {
6547     SDValue TruncOp = N0.getOperand(0);
6548     if (TruncOp.getValueType() == VT)
6549       return TruncOp; // x iff x size == zext size.
6550     if (TruncOp.getValueType().bitsGT(VT))
6551       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6552     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6553   }
6554 
6555   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6556   // if the trunc is not free.
6557   if (N0.getOpcode() == ISD::AND &&
6558       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6559       N0.getOperand(1).getOpcode() == ISD::Constant &&
6560       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6561                           N0.getValueType())) {
6562     SDValue X = N0.getOperand(0).getOperand(0);
6563     if (X.getValueType().bitsLT(VT)) {
6564       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
6565     } else if (X.getValueType().bitsGT(VT)) {
6566       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
6567     }
6568     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6569     Mask = Mask.zext(VT.getSizeInBits());
6570     SDLoc DL(N);
6571     return DAG.getNode(ISD::AND, DL, VT,
6572                        X, DAG.getConstant(Mask, DL, VT));
6573   }
6574 
6575   // fold (aext (load x)) -> (aext (truncate (extload x)))
6576   // None of the supported targets knows how to perform load and any_ext
6577   // on vectors in one instruction.  We only perform this transformation on
6578   // scalars.
6579   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6580       ISD::isUNINDEXEDLoad(N0.getNode()) &&
6581       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6582     bool DoXform = true;
6583     SmallVector<SDNode*, 4> SetCCs;
6584     if (!N0.hasOneUse())
6585       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6586     if (DoXform) {
6587       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6588       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6589                                        LN0->getChain(),
6590                                        LN0->getBasePtr(), N0.getValueType(),
6591                                        LN0->getMemOperand());
6592       CombineTo(N, ExtLoad);
6593       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6594                                   N0.getValueType(), ExtLoad);
6595       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6596       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6597                       ISD::ANY_EXTEND);
6598       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6599     }
6600   }
6601 
6602   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6603   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6604   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
6605   if (N0.getOpcode() == ISD::LOAD &&
6606       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6607       N0.hasOneUse()) {
6608     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6609     ISD::LoadExtType ExtType = LN0->getExtensionType();
6610     EVT MemVT = LN0->getMemoryVT();
6611     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6612       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6613                                        VT, LN0->getChain(), LN0->getBasePtr(),
6614                                        MemVT, LN0->getMemOperand());
6615       CombineTo(N, ExtLoad);
6616       CombineTo(N0.getNode(),
6617                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6618                             N0.getValueType(), ExtLoad),
6619                 ExtLoad.getValue(1));
6620       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6621     }
6622   }
6623 
6624   if (N0.getOpcode() == ISD::SETCC) {
6625     // For vectors:
6626     // aext(setcc) -> vsetcc
6627     // aext(setcc) -> truncate(vsetcc)
6628     // aext(setcc) -> aext(vsetcc)
6629     // Only do this before legalize for now.
6630     if (VT.isVector() && !LegalOperations) {
6631       EVT N0VT = N0.getOperand(0).getValueType();
6632         // We know that the # elements of the results is the same as the
6633         // # elements of the compare (and the # elements of the compare result
6634         // for that matter).  Check to see that they are the same size.  If so,
6635         // we know that the element size of the sext'd result matches the
6636         // element size of the compare operands.
6637       if (VT.getSizeInBits() == N0VT.getSizeInBits())
6638         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6639                              N0.getOperand(1),
6640                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6641       // If the desired elements are smaller or larger than the source
6642       // elements we can use a matching integer vector type and then
6643       // truncate/any extend
6644       else {
6645         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6646         SDValue VsetCC =
6647           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6648                         N0.getOperand(1),
6649                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
6650         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6651       }
6652     }
6653 
6654     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6655     SDLoc DL(N);
6656     if (SDValue SCC = SimplifySelectCC(
6657             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6658             DAG.getConstant(0, DL, VT),
6659             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6660       return SCC;
6661   }
6662 
6663   return SDValue();
6664 }
6665 
6666 /// See if the specified operand can be simplified with the knowledge that only
6667 /// the bits specified by Mask are used.  If so, return the simpler operand,
6668 /// otherwise return a null SDValue.
6669 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6670   switch (V.getOpcode()) {
6671   default: break;
6672   case ISD::Constant: {
6673     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
6674     assert(CV && "Const value should be ConstSDNode.");
6675     const APInt &CVal = CV->getAPIntValue();
6676     APInt NewVal = CVal & Mask;
6677     if (NewVal != CVal)
6678       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
6679     break;
6680   }
6681   case ISD::OR:
6682   case ISD::XOR:
6683     // If the LHS or RHS don't contribute bits to the or, drop them.
6684     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
6685       return V.getOperand(1);
6686     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
6687       return V.getOperand(0);
6688     break;
6689   case ISD::SRL:
6690     // Only look at single-use SRLs.
6691     if (!V.getNode()->hasOneUse())
6692       break;
6693     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
6694       // See if we can recursively simplify the LHS.
6695       unsigned Amt = RHSC->getZExtValue();
6696 
6697       // Watch out for shift count overflow though.
6698       if (Amt >= Mask.getBitWidth()) break;
6699       APInt NewMask = Mask << Amt;
6700       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
6701         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
6702                            SimplifyLHS, V.getOperand(1));
6703     }
6704   }
6705   return SDValue();
6706 }
6707 
6708 /// If the result of a wider load is shifted to right of N  bits and then
6709 /// truncated to a narrower type and where N is a multiple of number of bits of
6710 /// the narrower type, transform it to a narrower load from address + N / num of
6711 /// bits of new type. If the result is to be extended, also fold the extension
6712 /// to form a extending load.
6713 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
6714   unsigned Opc = N->getOpcode();
6715 
6716   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
6717   SDValue N0 = N->getOperand(0);
6718   EVT VT = N->getValueType(0);
6719   EVT ExtVT = VT;
6720 
6721   // This transformation isn't valid for vector loads.
6722   if (VT.isVector())
6723     return SDValue();
6724 
6725   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
6726   // extended to VT.
6727   if (Opc == ISD::SIGN_EXTEND_INREG) {
6728     ExtType = ISD::SEXTLOAD;
6729     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6730   } else if (Opc == ISD::SRL) {
6731     // Another special-case: SRL is basically zero-extending a narrower value.
6732     ExtType = ISD::ZEXTLOAD;
6733     N0 = SDValue(N, 0);
6734     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6735     if (!N01) return SDValue();
6736     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
6737                               VT.getSizeInBits() - N01->getZExtValue());
6738   }
6739   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
6740     return SDValue();
6741 
6742   unsigned EVTBits = ExtVT.getSizeInBits();
6743 
6744   // Do not generate loads of non-round integer types since these can
6745   // be expensive (and would be wrong if the type is not byte sized).
6746   if (!ExtVT.isRound())
6747     return SDValue();
6748 
6749   unsigned ShAmt = 0;
6750   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6751     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6752       ShAmt = N01->getZExtValue();
6753       // Is the shift amount a multiple of size of VT?
6754       if ((ShAmt & (EVTBits-1)) == 0) {
6755         N0 = N0.getOperand(0);
6756         // Is the load width a multiple of size of VT?
6757         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
6758           return SDValue();
6759       }
6760 
6761       // At this point, we must have a load or else we can't do the transform.
6762       if (!isa<LoadSDNode>(N0)) return SDValue();
6763 
6764       // Because a SRL must be assumed to *need* to zero-extend the high bits
6765       // (as opposed to anyext the high bits), we can't combine the zextload
6766       // lowering of SRL and an sextload.
6767       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
6768         return SDValue();
6769 
6770       // If the shift amount is larger than the input type then we're not
6771       // accessing any of the loaded bytes.  If the load was a zextload/extload
6772       // then the result of the shift+trunc is zero/undef (handled elsewhere).
6773       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
6774         return SDValue();
6775     }
6776   }
6777 
6778   // If the load is shifted left (and the result isn't shifted back right),
6779   // we can fold the truncate through the shift.
6780   unsigned ShLeftAmt = 0;
6781   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
6782       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
6783     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6784       ShLeftAmt = N01->getZExtValue();
6785       N0 = N0.getOperand(0);
6786     }
6787   }
6788 
6789   // If we haven't found a load, we can't narrow it.  Don't transform one with
6790   // multiple uses, this would require adding a new load.
6791   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
6792     return SDValue();
6793 
6794   // Don't change the width of a volatile load.
6795   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6796   if (LN0->isVolatile())
6797     return SDValue();
6798 
6799   // Verify that we are actually reducing a load width here.
6800   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
6801     return SDValue();
6802 
6803   // For the transform to be legal, the load must produce only two values
6804   // (the value loaded and the chain).  Don't transform a pre-increment
6805   // load, for example, which produces an extra value.  Otherwise the
6806   // transformation is not equivalent, and the downstream logic to replace
6807   // uses gets things wrong.
6808   if (LN0->getNumValues() > 2)
6809     return SDValue();
6810 
6811   // If the load that we're shrinking is an extload and we're not just
6812   // discarding the extension we can't simply shrink the load. Bail.
6813   // TODO: It would be possible to merge the extensions in some cases.
6814   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
6815       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
6816     return SDValue();
6817 
6818   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
6819     return SDValue();
6820 
6821   EVT PtrType = N0.getOperand(1).getValueType();
6822 
6823   if (PtrType == MVT::Untyped || PtrType.isExtended())
6824     // It's not possible to generate a constant of extended or untyped type.
6825     return SDValue();
6826 
6827   // For big endian targets, we need to adjust the offset to the pointer to
6828   // load the correct bytes.
6829   if (DAG.getDataLayout().isBigEndian()) {
6830     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
6831     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
6832     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
6833   }
6834 
6835   uint64_t PtrOff = ShAmt / 8;
6836   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
6837   SDLoc DL(LN0);
6838   // The original load itself didn't wrap, so an offset within it doesn't.
6839   SDNodeFlags Flags;
6840   Flags.setNoUnsignedWrap(true);
6841   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
6842                                PtrType, LN0->getBasePtr(),
6843                                DAG.getConstant(PtrOff, DL, PtrType),
6844                                &Flags);
6845   AddToWorklist(NewPtr.getNode());
6846 
6847   SDValue Load;
6848   if (ExtType == ISD::NON_EXTLOAD)
6849     Load =  DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
6850                         LN0->getPointerInfo().getWithOffset(PtrOff),
6851                         LN0->isVolatile(), LN0->isNonTemporal(),
6852                         LN0->isInvariant(), NewAlign, LN0->getAAInfo());
6853   else
6854     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
6855                           LN0->getPointerInfo().getWithOffset(PtrOff),
6856                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
6857                           LN0->isInvariant(), NewAlign, LN0->getAAInfo());
6858 
6859   // Replace the old load's chain with the new load's chain.
6860   WorklistRemover DeadNodes(*this);
6861   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
6862 
6863   // Shift the result left, if we've swallowed a left shift.
6864   SDValue Result = Load;
6865   if (ShLeftAmt != 0) {
6866     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
6867     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
6868       ShImmTy = VT;
6869     // If the shift amount is as large as the result size (but, presumably,
6870     // no larger than the source) then the useful bits of the result are
6871     // zero; we can't simply return the shortened shift, because the result
6872     // of that operation is undefined.
6873     SDLoc DL(N0);
6874     if (ShLeftAmt >= VT.getSizeInBits())
6875       Result = DAG.getConstant(0, DL, VT);
6876     else
6877       Result = DAG.getNode(ISD::SHL, DL, VT,
6878                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
6879   }
6880 
6881   // Return the new loaded value.
6882   return Result;
6883 }
6884 
6885 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
6886   SDValue N0 = N->getOperand(0);
6887   SDValue N1 = N->getOperand(1);
6888   EVT VT = N->getValueType(0);
6889   EVT EVT = cast<VTSDNode>(N1)->getVT();
6890   unsigned VTBits = VT.getScalarType().getSizeInBits();
6891   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
6892 
6893   if (N0.isUndef())
6894     return DAG.getUNDEF(VT);
6895 
6896   // fold (sext_in_reg c1) -> c1
6897   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6898     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
6899 
6900   // If the input is already sign extended, just drop the extension.
6901   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
6902     return N0;
6903 
6904   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
6905   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
6906       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
6907     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6908                        N0.getOperand(0), N1);
6909 
6910   // fold (sext_in_reg (sext x)) -> (sext x)
6911   // fold (sext_in_reg (aext x)) -> (sext x)
6912   // if x is small enough.
6913   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
6914     SDValue N00 = N0.getOperand(0);
6915     if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
6916         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
6917       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
6918   }
6919 
6920   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
6921   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
6922     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
6923 
6924   // fold operands of sext_in_reg based on knowledge that the top bits are not
6925   // demanded.
6926   if (SimplifyDemandedBits(SDValue(N, 0)))
6927     return SDValue(N, 0);
6928 
6929   // fold (sext_in_reg (load x)) -> (smaller sextload x)
6930   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
6931   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6932     return NarrowLoad;
6933 
6934   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
6935   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
6936   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
6937   if (N0.getOpcode() == ISD::SRL) {
6938     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
6939       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
6940         // We can turn this into an SRA iff the input to the SRL is already sign
6941         // extended enough.
6942         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
6943         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
6944           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
6945                              N0.getOperand(0), N0.getOperand(1));
6946       }
6947   }
6948 
6949   // fold (sext_inreg (extload x)) -> (sextload x)
6950   if (ISD::isEXTLoad(N0.getNode()) &&
6951       ISD::isUNINDEXEDLoad(N0.getNode()) &&
6952       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
6953       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
6954        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
6955     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6956     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6957                                      LN0->getChain(),
6958                                      LN0->getBasePtr(), EVT,
6959                                      LN0->getMemOperand());
6960     CombineTo(N, ExtLoad);
6961     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6962     AddToWorklist(ExtLoad.getNode());
6963     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6964   }
6965   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
6966   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6967       N0.hasOneUse() &&
6968       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
6969       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
6970        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
6971     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6972     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6973                                      LN0->getChain(),
6974                                      LN0->getBasePtr(), EVT,
6975                                      LN0->getMemOperand());
6976     CombineTo(N, ExtLoad);
6977     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6978     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6979   }
6980 
6981   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
6982   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
6983     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6984                                            N0.getOperand(1), false))
6985       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6986                          BSwap, N1);
6987   }
6988 
6989   return SDValue();
6990 }
6991 
6992 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
6993   SDValue N0 = N->getOperand(0);
6994   EVT VT = N->getValueType(0);
6995 
6996   if (N0.isUndef())
6997     return DAG.getUNDEF(VT);
6998 
6999   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7000                                               LegalOperations))
7001     return SDValue(Res, 0);
7002 
7003   return SDValue();
7004 }
7005 
7006 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7007   SDValue N0 = N->getOperand(0);
7008   EVT VT = N->getValueType(0);
7009 
7010   if (N0.isUndef())
7011     return DAG.getUNDEF(VT);
7012 
7013   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7014                                               LegalOperations))
7015     return SDValue(Res, 0);
7016 
7017   return SDValue();
7018 }
7019 
7020 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7021   SDValue N0 = N->getOperand(0);
7022   EVT VT = N->getValueType(0);
7023   bool isLE = DAG.getDataLayout().isLittleEndian();
7024 
7025   // noop truncate
7026   if (N0.getValueType() == N->getValueType(0))
7027     return N0;
7028   // fold (truncate c1) -> c1
7029   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7030     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7031   // fold (truncate (truncate x)) -> (truncate x)
7032   if (N0.getOpcode() == ISD::TRUNCATE)
7033     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7034   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7035   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7036       N0.getOpcode() == ISD::SIGN_EXTEND ||
7037       N0.getOpcode() == ISD::ANY_EXTEND) {
7038     // if the source is smaller than the dest, we still need an extend.
7039     if (N0.getOperand(0).getValueType().bitsLT(VT))
7040       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7041     // if the source is larger than the dest, than we just need the truncate.
7042     if (N0.getOperand(0).getValueType().bitsGT(VT))
7043       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7044     // if the source and dest are the same type, we can drop both the extend
7045     // and the truncate.
7046     return N0.getOperand(0);
7047   }
7048 
7049   // Fold extract-and-trunc into a narrow extract. For example:
7050   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
7051   //   i32 y = TRUNCATE(i64 x)
7052   //        -- becomes --
7053   //   v16i8 b = BITCAST (v2i64 val)
7054   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
7055   //
7056   // Note: We only run this optimization after type legalization (which often
7057   // creates this pattern) and before operation legalization after which
7058   // we need to be more careful about the vector instructions that we generate.
7059   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7060       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
7061 
7062     EVT VecTy = N0.getOperand(0).getValueType();
7063     EVT ExTy = N0.getValueType();
7064     EVT TrTy = N->getValueType(0);
7065 
7066     unsigned NumElem = VecTy.getVectorNumElements();
7067     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
7068 
7069     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
7070     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
7071 
7072     SDValue EltNo = N0->getOperand(1);
7073     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
7074       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
7075       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7076       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
7077 
7078       SDLoc DL(N);
7079       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
7080                          DAG.getBitcast(NVT, N0.getOperand(0)),
7081                          DAG.getConstant(Index, DL, IndexTy));
7082     }
7083   }
7084 
7085   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
7086   if (N0.getOpcode() == ISD::SELECT) {
7087     EVT SrcVT = N0.getValueType();
7088     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
7089         TLI.isTruncateFree(SrcVT, VT)) {
7090       SDLoc SL(N0);
7091       SDValue Cond = N0.getOperand(0);
7092       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
7093       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
7094       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
7095     }
7096   }
7097 
7098   // Fold a series of buildvector, bitcast, and truncate if possible.
7099   // For example fold
7100   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
7101   //   (2xi32 (buildvector x, y)).
7102   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
7103       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
7104       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
7105       N0.getOperand(0).hasOneUse()) {
7106 
7107     SDValue BuildVect = N0.getOperand(0);
7108     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
7109     EVT TruncVecEltTy = VT.getVectorElementType();
7110 
7111     // Check that the element types match.
7112     if (BuildVectEltTy == TruncVecEltTy) {
7113       // Now we only need to compute the offset of the truncated elements.
7114       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
7115       unsigned TruncVecNumElts = VT.getVectorNumElements();
7116       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
7117 
7118       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
7119              "Invalid number of elements");
7120 
7121       SmallVector<SDValue, 8> Opnds;
7122       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
7123         Opnds.push_back(BuildVect.getOperand(i));
7124 
7125       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
7126     }
7127   }
7128 
7129   // See if we can simplify the input to this truncate through knowledge that
7130   // only the low bits are being used.
7131   // For example "trunc (or (shl x, 8), y)" // -> trunc y
7132   // Currently we only perform this optimization on scalars because vectors
7133   // may have different active low bits.
7134   if (!VT.isVector()) {
7135     if (SDValue Shorter =
7136             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
7137                                                      VT.getSizeInBits())))
7138       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
7139   }
7140   // fold (truncate (load x)) -> (smaller load x)
7141   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7142   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7143     if (SDValue Reduced = ReduceLoadWidth(N))
7144       return Reduced;
7145 
7146     // Handle the case where the load remains an extending load even
7147     // after truncation.
7148     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7149       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7150       if (!LN0->isVolatile() &&
7151           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7152         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7153                                          VT, LN0->getChain(), LN0->getBasePtr(),
7154                                          LN0->getMemoryVT(),
7155                                          LN0->getMemOperand());
7156         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7157         return NewLoad;
7158       }
7159     }
7160   }
7161   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7162   // where ... are all 'undef'.
7163   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7164     SmallVector<EVT, 8> VTs;
7165     SDValue V;
7166     unsigned Idx = 0;
7167     unsigned NumDefs = 0;
7168 
7169     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7170       SDValue X = N0.getOperand(i);
7171       if (X.getOpcode() != ISD::UNDEF) {
7172         V = X;
7173         Idx = i;
7174         NumDefs++;
7175       }
7176       // Stop if more than one members are non-undef.
7177       if (NumDefs > 1)
7178         break;
7179       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
7180                                      VT.getVectorElementType(),
7181                                      X.getValueType().getVectorNumElements()));
7182     }
7183 
7184     if (NumDefs == 0)
7185       return DAG.getUNDEF(VT);
7186 
7187     if (NumDefs == 1) {
7188       assert(V.getNode() && "The single defined operand is empty!");
7189       SmallVector<SDValue, 8> Opnds;
7190       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7191         if (i != Idx) {
7192           Opnds.push_back(DAG.getUNDEF(VTs[i]));
7193           continue;
7194         }
7195         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7196         AddToWorklist(NV.getNode());
7197         Opnds.push_back(NV);
7198       }
7199       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7200     }
7201   }
7202 
7203   // Fold truncate of a bitcast of a vector to an extract of the low vector
7204   // element.
7205   //
7206   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7207   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7208     SDValue VecSrc = N0.getOperand(0);
7209     EVT SrcVT = VecSrc.getValueType();
7210     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
7211         (!LegalOperations ||
7212          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
7213       SDLoc SL(N);
7214 
7215       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7216       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7217                          VecSrc, DAG.getConstant(0, SL, IdxVT));
7218     }
7219   }
7220 
7221   // Simplify the operands using demanded-bits information.
7222   if (!VT.isVector() &&
7223       SimplifyDemandedBits(SDValue(N, 0)))
7224     return SDValue(N, 0);
7225 
7226   return SDValue();
7227 }
7228 
7229 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7230   SDValue Elt = N->getOperand(i);
7231   if (Elt.getOpcode() != ISD::MERGE_VALUES)
7232     return Elt.getNode();
7233   return Elt.getOperand(Elt.getResNo()).getNode();
7234 }
7235 
7236 /// build_pair (load, load) -> load
7237 /// if load locations are consecutive.
7238 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7239   assert(N->getOpcode() == ISD::BUILD_PAIR);
7240 
7241   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
7242   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
7243   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7244       LD1->getAddressSpace() != LD2->getAddressSpace())
7245     return SDValue();
7246   EVT LD1VT = LD1->getValueType(0);
7247 
7248   if (ISD::isNON_EXTLoad(LD2) &&
7249       LD2->hasOneUse() &&
7250       // If both are volatile this would reduce the number of volatile loads.
7251       // If one is volatile it might be ok, but play conservative and bail out.
7252       !LD1->isVolatile() &&
7253       !LD2->isVolatile() &&
7254       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
7255     unsigned Align = LD1->getAlignment();
7256     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7257         VT.getTypeForEVT(*DAG.getContext()));
7258 
7259     if (NewAlign <= Align &&
7260         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7261       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
7262                          LD1->getBasePtr(), LD1->getPointerInfo(),
7263                          false, false, false, Align);
7264   }
7265 
7266   return SDValue();
7267 }
7268 
7269 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
7270   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
7271   // and Lo parts; on big-endian machines it doesn't.
7272   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
7273 }
7274 
7275 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7276   SDValue N0 = N->getOperand(0);
7277   EVT VT = N->getValueType(0);
7278 
7279   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7280   // Only do this before legalize, since afterward the target may be depending
7281   // on the bitconvert.
7282   // First check to see if this is all constant.
7283   if (!LegalTypes &&
7284       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7285       VT.isVector()) {
7286     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7287 
7288     EVT DestEltVT = N->getValueType(0).getVectorElementType();
7289     assert(!DestEltVT.isVector() &&
7290            "Element type of vector ValueType must not be vector!");
7291     if (isSimple)
7292       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7293   }
7294 
7295   // If the input is a constant, let getNode fold it.
7296   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7297     // If we can't allow illegal operations, we need to check that this is just
7298     // a fp -> int or int -> conversion and that the resulting operation will
7299     // be legal.
7300     if (!LegalOperations ||
7301         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7302          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7303         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7304          TLI.isOperationLegal(ISD::Constant, VT)))
7305       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
7306   }
7307 
7308   // (conv (conv x, t1), t2) -> (conv x, t2)
7309   if (N0.getOpcode() == ISD::BITCAST)
7310     return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
7311                        N0.getOperand(0));
7312 
7313   // fold (conv (load x)) -> (load (conv*)x)
7314   // If the resultant load doesn't need a higher alignment than the original!
7315   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7316       // Do not change the width of a volatile load.
7317       !cast<LoadSDNode>(N0)->isVolatile() &&
7318       // Do not remove the cast if the types differ in endian layout.
7319       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7320           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7321       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7322       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7323     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7324     unsigned Align = DAG.getDataLayout().getABITypeAlignment(
7325         VT.getTypeForEVT(*DAG.getContext()));
7326     unsigned OrigAlign = LN0->getAlignment();
7327 
7328     if (Align <= OrigAlign) {
7329       SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
7330                                  LN0->getBasePtr(), LN0->getPointerInfo(),
7331                                  LN0->isVolatile(), LN0->isNonTemporal(),
7332                                  LN0->isInvariant(), OrigAlign,
7333                                  LN0->getAAInfo());
7334       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7335       return Load;
7336     }
7337   }
7338 
7339   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7340   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7341   //
7342   // For ppc_fp128:
7343   // fold (bitcast (fneg x)) ->
7344   //     flipbit = signbit
7345   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7346   //
7347   // fold (bitcast (fabs x)) ->
7348   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
7349   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7350   // This often reduces constant pool loads.
7351   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7352        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7353       N0.getNode()->hasOneUse() && VT.isInteger() &&
7354       !VT.isVector() && !N0.getValueType().isVector()) {
7355     SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
7356                                   N0.getOperand(0));
7357     AddToWorklist(NewConv.getNode());
7358 
7359     SDLoc DL(N);
7360     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7361       assert(VT.getSizeInBits() == 128);
7362       SDValue SignBit = DAG.getConstant(
7363           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
7364       SDValue FlipBit;
7365       if (N0.getOpcode() == ISD::FNEG) {
7366         FlipBit = SignBit;
7367         AddToWorklist(FlipBit.getNode());
7368       } else {
7369         assert(N0.getOpcode() == ISD::FABS);
7370         SDValue Hi =
7371             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
7372                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7373                                               SDLoc(NewConv)));
7374         AddToWorklist(Hi.getNode());
7375         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
7376         AddToWorklist(FlipBit.getNode());
7377       }
7378       SDValue FlipBits =
7379           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7380       AddToWorklist(FlipBits.getNode());
7381       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
7382     }
7383     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7384     if (N0.getOpcode() == ISD::FNEG)
7385       return DAG.getNode(ISD::XOR, DL, VT,
7386                          NewConv, DAG.getConstant(SignBit, DL, VT));
7387     assert(N0.getOpcode() == ISD::FABS);
7388     return DAG.getNode(ISD::AND, DL, VT,
7389                        NewConv, DAG.getConstant(~SignBit, DL, VT));
7390   }
7391 
7392   // fold (bitconvert (fcopysign cst, x)) ->
7393   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
7394   // Note that we don't handle (copysign x, cst) because this can always be
7395   // folded to an fneg or fabs.
7396   //
7397   // For ppc_fp128:
7398   // fold (bitcast (fcopysign cst, x)) ->
7399   //     flipbit = (and (extract_element
7400   //                     (xor (bitcast cst), (bitcast x)), 0),
7401   //                    signbit)
7402   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
7403   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7404       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7405       VT.isInteger() && !VT.isVector()) {
7406     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
7407     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7408     if (isTypeLegal(IntXVT)) {
7409       SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
7410                               IntXVT, N0.getOperand(1));
7411       AddToWorklist(X.getNode());
7412 
7413       // If X has a different width than the result/lhs, sext it or truncate it.
7414       unsigned VTWidth = VT.getSizeInBits();
7415       if (OrigXWidth < VTWidth) {
7416         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7417         AddToWorklist(X.getNode());
7418       } else if (OrigXWidth > VTWidth) {
7419         // To get the sign bit in the right place, we have to shift it right
7420         // before truncating.
7421         SDLoc DL(X);
7422         X = DAG.getNode(ISD::SRL, DL,
7423                         X.getValueType(), X,
7424                         DAG.getConstant(OrigXWidth-VTWidth, DL,
7425                                         X.getValueType()));
7426         AddToWorklist(X.getNode());
7427         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7428         AddToWorklist(X.getNode());
7429       }
7430 
7431       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7432         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
7433         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7434         AddToWorklist(Cst.getNode());
7435         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
7436         AddToWorklist(X.getNode());
7437         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
7438         AddToWorklist(XorResult.getNode());
7439         SDValue XorResult64 = DAG.getNode(
7440             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
7441             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7442                                   SDLoc(XorResult)));
7443         AddToWorklist(XorResult64.getNode());
7444         SDValue FlipBit =
7445             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
7446                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
7447         AddToWorklist(FlipBit.getNode());
7448         SDValue FlipBits =
7449             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7450         AddToWorklist(FlipBits.getNode());
7451         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
7452       }
7453       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7454       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7455                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
7456       AddToWorklist(X.getNode());
7457 
7458       SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
7459                                 VT, N0.getOperand(0));
7460       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7461                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7462       AddToWorklist(Cst.getNode());
7463 
7464       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7465     }
7466   }
7467 
7468   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7469   if (N0.getOpcode() == ISD::BUILD_PAIR)
7470     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
7471       return CombineLD;
7472 
7473   // Remove double bitcasts from shuffles - this is often a legacy of
7474   // XformToShuffleWithZero being used to combine bitmaskings (of
7475   // float vectors bitcast to integer vectors) into shuffles.
7476   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7477   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7478       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7479       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
7480       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
7481     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7482 
7483     // If operands are a bitcast, peek through if it casts the original VT.
7484     // If operands are a constant, just bitcast back to original VT.
7485     auto PeekThroughBitcast = [&](SDValue Op) {
7486       if (Op.getOpcode() == ISD::BITCAST &&
7487           Op.getOperand(0).getValueType() == VT)
7488         return SDValue(Op.getOperand(0));
7489       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
7490           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
7491         return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7492       return SDValue();
7493     };
7494 
7495     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7496     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7497     if (!(SV0 && SV1))
7498       return SDValue();
7499 
7500     int MaskScale =
7501         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
7502     SmallVector<int, 8> NewMask;
7503     for (int M : SVN->getMask())
7504       for (int i = 0; i != MaskScale; ++i)
7505         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7506 
7507     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7508     if (!LegalMask) {
7509       std::swap(SV0, SV1);
7510       ShuffleVectorSDNode::commuteMask(NewMask);
7511       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7512     }
7513 
7514     if (LegalMask)
7515       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7516   }
7517 
7518   return SDValue();
7519 }
7520 
7521 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7522   EVT VT = N->getValueType(0);
7523   return CombineConsecutiveLoads(N, VT);
7524 }
7525 
7526 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7527 /// operands. DstEltVT indicates the destination element value type.
7528 SDValue DAGCombiner::
7529 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7530   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7531 
7532   // If this is already the right type, we're done.
7533   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7534 
7535   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7536   unsigned DstBitSize = DstEltVT.getSizeInBits();
7537 
7538   // If this is a conversion of N elements of one type to N elements of another
7539   // type, convert each element.  This handles FP<->INT cases.
7540   if (SrcBitSize == DstBitSize) {
7541     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7542                               BV->getValueType(0).getVectorNumElements());
7543 
7544     // Due to the FP element handling below calling this routine recursively,
7545     // we can end up with a scalar-to-vector node here.
7546     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7547       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7548                          DAG.getNode(ISD::BITCAST, SDLoc(BV),
7549                                      DstEltVT, BV->getOperand(0)));
7550 
7551     SmallVector<SDValue, 8> Ops;
7552     for (SDValue Op : BV->op_values()) {
7553       // If the vector element type is not legal, the BUILD_VECTOR operands
7554       // are promoted and implicitly truncated.  Make that explicit here.
7555       if (Op.getValueType() != SrcEltVT)
7556         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7557       Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
7558                                 DstEltVT, Op));
7559       AddToWorklist(Ops.back().getNode());
7560     }
7561     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
7562   }
7563 
7564   // Otherwise, we're growing or shrinking the elements.  To avoid having to
7565   // handle annoying details of growing/shrinking FP values, we convert them to
7566   // int first.
7567   if (SrcEltVT.isFloatingPoint()) {
7568     // Convert the input float vector to a int vector where the elements are the
7569     // same sizes.
7570     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7571     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7572     SrcEltVT = IntVT;
7573   }
7574 
7575   // Now we know the input is an integer vector.  If the output is a FP type,
7576   // convert to integer first, then to FP of the right size.
7577   if (DstEltVT.isFloatingPoint()) {
7578     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7579     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7580 
7581     // Next, convert to FP elements of the same size.
7582     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7583   }
7584 
7585   SDLoc DL(BV);
7586 
7587   // Okay, we know the src/dst types are both integers of differing types.
7588   // Handling growing first.
7589   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7590   if (SrcBitSize < DstBitSize) {
7591     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7592 
7593     SmallVector<SDValue, 8> Ops;
7594     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7595          i += NumInputsPerOutput) {
7596       bool isLE = DAG.getDataLayout().isLittleEndian();
7597       APInt NewBits = APInt(DstBitSize, 0);
7598       bool EltIsUndef = true;
7599       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7600         // Shift the previously computed bits over.
7601         NewBits <<= SrcBitSize;
7602         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7603         if (Op.isUndef()) continue;
7604         EltIsUndef = false;
7605 
7606         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
7607                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
7608       }
7609 
7610       if (EltIsUndef)
7611         Ops.push_back(DAG.getUNDEF(DstEltVT));
7612       else
7613         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
7614     }
7615 
7616     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
7617     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
7618   }
7619 
7620   // Finally, this must be the case where we are shrinking elements: each input
7621   // turns into multiple outputs.
7622   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
7623   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7624                             NumOutputsPerInput*BV->getNumOperands());
7625   SmallVector<SDValue, 8> Ops;
7626 
7627   for (const SDValue &Op : BV->op_values()) {
7628     if (Op.isUndef()) {
7629       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
7630       continue;
7631     }
7632 
7633     APInt OpVal = cast<ConstantSDNode>(Op)->
7634                   getAPIntValue().zextOrTrunc(SrcBitSize);
7635 
7636     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
7637       APInt ThisVal = OpVal.trunc(DstBitSize);
7638       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
7639       OpVal = OpVal.lshr(DstBitSize);
7640     }
7641 
7642     // For big endian targets, swap the order of the pieces of each element.
7643     if (DAG.getDataLayout().isBigEndian())
7644       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
7645   }
7646 
7647   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
7648 }
7649 
7650 /// Try to perform FMA combining on a given FADD node.
7651 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
7652   SDValue N0 = N->getOperand(0);
7653   SDValue N1 = N->getOperand(1);
7654   EVT VT = N->getValueType(0);
7655   SDLoc SL(N);
7656 
7657   const TargetOptions &Options = DAG.getTarget().Options;
7658   bool AllowFusion =
7659       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7660 
7661   // Floating-point multiply-add with intermediate rounding.
7662   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
7663 
7664   // Floating-point multiply-add without intermediate rounding.
7665   bool HasFMA =
7666       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7667       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
7668 
7669   // No valid opcode, do not combine.
7670   if (!HasFMAD && !HasFMA)
7671     return SDValue();
7672 
7673   // Always prefer FMAD to FMA for precision.
7674   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7675   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7676   bool LookThroughFPExt = TLI.isFPExtFree(VT);
7677 
7678   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
7679   // prefer to fold the multiply with fewer uses.
7680   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
7681       N1.getOpcode() == ISD::FMUL) {
7682     if (N0.getNode()->use_size() > N1.getNode()->use_size())
7683       std::swap(N0, N1);
7684   }
7685 
7686   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
7687   if (N0.getOpcode() == ISD::FMUL &&
7688       (Aggressive || N0->hasOneUse())) {
7689     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7690                        N0.getOperand(0), N0.getOperand(1), N1);
7691   }
7692 
7693   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
7694   // Note: Commutes FADD operands.
7695   if (N1.getOpcode() == ISD::FMUL &&
7696       (Aggressive || N1->hasOneUse())) {
7697     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7698                        N1.getOperand(0), N1.getOperand(1), N0);
7699   }
7700 
7701   // Look through FP_EXTEND nodes to do more combining.
7702   if (AllowFusion && LookThroughFPExt) {
7703     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7704     if (N0.getOpcode() == ISD::FP_EXTEND) {
7705       SDValue N00 = N0.getOperand(0);
7706       if (N00.getOpcode() == ISD::FMUL)
7707         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7708                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7709                                        N00.getOperand(0)),
7710                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7711                                        N00.getOperand(1)), N1);
7712     }
7713 
7714     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
7715     // Note: Commutes FADD operands.
7716     if (N1.getOpcode() == ISD::FP_EXTEND) {
7717       SDValue N10 = N1.getOperand(0);
7718       if (N10.getOpcode() == ISD::FMUL)
7719         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7720                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7721                                        N10.getOperand(0)),
7722                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7723                                        N10.getOperand(1)), N0);
7724     }
7725   }
7726 
7727   // More folding opportunities when target permits.
7728   if ((AllowFusion || HasFMAD)  && Aggressive) {
7729     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
7730     if (N0.getOpcode() == PreferredFusedOpcode &&
7731         N0.getOperand(2).getOpcode() == ISD::FMUL) {
7732       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7733                          N0.getOperand(0), N0.getOperand(1),
7734                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7735                                      N0.getOperand(2).getOperand(0),
7736                                      N0.getOperand(2).getOperand(1),
7737                                      N1));
7738     }
7739 
7740     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
7741     if (N1->getOpcode() == PreferredFusedOpcode &&
7742         N1.getOperand(2).getOpcode() == ISD::FMUL) {
7743       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7744                          N1.getOperand(0), N1.getOperand(1),
7745                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7746                                      N1.getOperand(2).getOperand(0),
7747                                      N1.getOperand(2).getOperand(1),
7748                                      N0));
7749     }
7750 
7751     if (AllowFusion && LookThroughFPExt) {
7752       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
7753       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
7754       auto FoldFAddFMAFPExtFMul = [&] (
7755           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7756         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
7757                            DAG.getNode(PreferredFusedOpcode, SL, VT,
7758                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7759                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7760                                        Z));
7761       };
7762       if (N0.getOpcode() == PreferredFusedOpcode) {
7763         SDValue N02 = N0.getOperand(2);
7764         if (N02.getOpcode() == ISD::FP_EXTEND) {
7765           SDValue N020 = N02.getOperand(0);
7766           if (N020.getOpcode() == ISD::FMUL)
7767             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
7768                                         N020.getOperand(0), N020.getOperand(1),
7769                                         N1);
7770         }
7771       }
7772 
7773       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
7774       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
7775       // FIXME: This turns two single-precision and one double-precision
7776       // operation into two double-precision operations, which might not be
7777       // interesting for all targets, especially GPUs.
7778       auto FoldFAddFPExtFMAFMul = [&] (
7779           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7780         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7781                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
7782                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
7783                            DAG.getNode(PreferredFusedOpcode, SL, VT,
7784                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7785                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7786                                        Z));
7787       };
7788       if (N0.getOpcode() == ISD::FP_EXTEND) {
7789         SDValue N00 = N0.getOperand(0);
7790         if (N00.getOpcode() == PreferredFusedOpcode) {
7791           SDValue N002 = N00.getOperand(2);
7792           if (N002.getOpcode() == ISD::FMUL)
7793             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
7794                                         N002.getOperand(0), N002.getOperand(1),
7795                                         N1);
7796         }
7797       }
7798 
7799       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
7800       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
7801       if (N1.getOpcode() == PreferredFusedOpcode) {
7802         SDValue N12 = N1.getOperand(2);
7803         if (N12.getOpcode() == ISD::FP_EXTEND) {
7804           SDValue N120 = N12.getOperand(0);
7805           if (N120.getOpcode() == ISD::FMUL)
7806             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
7807                                         N120.getOperand(0), N120.getOperand(1),
7808                                         N0);
7809         }
7810       }
7811 
7812       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
7813       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
7814       // FIXME: This turns two single-precision and one double-precision
7815       // operation into two double-precision operations, which might not be
7816       // interesting for all targets, especially GPUs.
7817       if (N1.getOpcode() == ISD::FP_EXTEND) {
7818         SDValue N10 = N1.getOperand(0);
7819         if (N10.getOpcode() == PreferredFusedOpcode) {
7820           SDValue N102 = N10.getOperand(2);
7821           if (N102.getOpcode() == ISD::FMUL)
7822             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
7823                                         N102.getOperand(0), N102.getOperand(1),
7824                                         N0);
7825         }
7826       }
7827     }
7828   }
7829 
7830   return SDValue();
7831 }
7832 
7833 /// Try to perform FMA combining on a given FSUB node.
7834 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
7835   SDValue N0 = N->getOperand(0);
7836   SDValue N1 = N->getOperand(1);
7837   EVT VT = N->getValueType(0);
7838   SDLoc SL(N);
7839 
7840   const TargetOptions &Options = DAG.getTarget().Options;
7841   bool AllowFusion =
7842       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7843 
7844   // Floating-point multiply-add with intermediate rounding.
7845   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
7846 
7847   // Floating-point multiply-add without intermediate rounding.
7848   bool HasFMA =
7849       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7850       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
7851 
7852   // No valid opcode, do not combine.
7853   if (!HasFMAD && !HasFMA)
7854     return SDValue();
7855 
7856   // Always prefer FMAD to FMA for precision.
7857   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7858   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7859   bool LookThroughFPExt = TLI.isFPExtFree(VT);
7860 
7861   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
7862   if (N0.getOpcode() == ISD::FMUL &&
7863       (Aggressive || N0->hasOneUse())) {
7864     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7865                        N0.getOperand(0), N0.getOperand(1),
7866                        DAG.getNode(ISD::FNEG, SL, VT, N1));
7867   }
7868 
7869   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
7870   // Note: Commutes FSUB operands.
7871   if (N1.getOpcode() == ISD::FMUL &&
7872       (Aggressive || N1->hasOneUse()))
7873     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7874                        DAG.getNode(ISD::FNEG, SL, VT,
7875                                    N1.getOperand(0)),
7876                        N1.getOperand(1), N0);
7877 
7878   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
7879   if (N0.getOpcode() == ISD::FNEG &&
7880       N0.getOperand(0).getOpcode() == ISD::FMUL &&
7881       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
7882     SDValue N00 = N0.getOperand(0).getOperand(0);
7883     SDValue N01 = N0.getOperand(0).getOperand(1);
7884     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7885                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
7886                        DAG.getNode(ISD::FNEG, SL, VT, N1));
7887   }
7888 
7889   // Look through FP_EXTEND nodes to do more combining.
7890   if (AllowFusion && LookThroughFPExt) {
7891     // fold (fsub (fpext (fmul x, y)), z)
7892     //   -> (fma (fpext x), (fpext y), (fneg z))
7893     if (N0.getOpcode() == ISD::FP_EXTEND) {
7894       SDValue N00 = N0.getOperand(0);
7895       if (N00.getOpcode() == ISD::FMUL)
7896         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7897                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7898                                        N00.getOperand(0)),
7899                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7900                                        N00.getOperand(1)),
7901                            DAG.getNode(ISD::FNEG, SL, VT, N1));
7902     }
7903 
7904     // fold (fsub x, (fpext (fmul y, z)))
7905     //   -> (fma (fneg (fpext y)), (fpext z), x)
7906     // Note: Commutes FSUB operands.
7907     if (N1.getOpcode() == ISD::FP_EXTEND) {
7908       SDValue N10 = N1.getOperand(0);
7909       if (N10.getOpcode() == ISD::FMUL)
7910         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7911                            DAG.getNode(ISD::FNEG, SL, VT,
7912                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
7913                                                    N10.getOperand(0))),
7914                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7915                                        N10.getOperand(1)),
7916                            N0);
7917     }
7918 
7919     // fold (fsub (fpext (fneg (fmul, x, y))), z)
7920     //   -> (fneg (fma (fpext x), (fpext y), z))
7921     // Note: This could be removed with appropriate canonicalization of the
7922     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
7923     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
7924     // from implementing the canonicalization in visitFSUB.
7925     if (N0.getOpcode() == ISD::FP_EXTEND) {
7926       SDValue N00 = N0.getOperand(0);
7927       if (N00.getOpcode() == ISD::FNEG) {
7928         SDValue N000 = N00.getOperand(0);
7929         if (N000.getOpcode() == ISD::FMUL) {
7930           return DAG.getNode(ISD::FNEG, SL, VT,
7931                              DAG.getNode(PreferredFusedOpcode, SL, VT,
7932                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
7933                                                      N000.getOperand(0)),
7934                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
7935                                                      N000.getOperand(1)),
7936                                          N1));
7937         }
7938       }
7939     }
7940 
7941     // fold (fsub (fneg (fpext (fmul, x, y))), z)
7942     //   -> (fneg (fma (fpext x)), (fpext y), z)
7943     // Note: This could be removed with appropriate canonicalization of the
7944     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
7945     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
7946     // from implementing the canonicalization in visitFSUB.
7947     if (N0.getOpcode() == ISD::FNEG) {
7948       SDValue N00 = N0.getOperand(0);
7949       if (N00.getOpcode() == ISD::FP_EXTEND) {
7950         SDValue N000 = N00.getOperand(0);
7951         if (N000.getOpcode() == ISD::FMUL) {
7952           return DAG.getNode(ISD::FNEG, SL, VT,
7953                              DAG.getNode(PreferredFusedOpcode, SL, VT,
7954                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
7955                                                      N000.getOperand(0)),
7956                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
7957                                                      N000.getOperand(1)),
7958                                          N1));
7959         }
7960       }
7961     }
7962 
7963   }
7964 
7965   // More folding opportunities when target permits.
7966   if ((AllowFusion || HasFMAD) && Aggressive) {
7967     // fold (fsub (fma x, y, (fmul u, v)), z)
7968     //   -> (fma x, y (fma u, v, (fneg z)))
7969     if (N0.getOpcode() == PreferredFusedOpcode &&
7970         N0.getOperand(2).getOpcode() == ISD::FMUL) {
7971       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7972                          N0.getOperand(0), N0.getOperand(1),
7973                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7974                                      N0.getOperand(2).getOperand(0),
7975                                      N0.getOperand(2).getOperand(1),
7976                                      DAG.getNode(ISD::FNEG, SL, VT,
7977                                                  N1)));
7978     }
7979 
7980     // fold (fsub x, (fma y, z, (fmul u, v)))
7981     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
7982     if (N1.getOpcode() == PreferredFusedOpcode &&
7983         N1.getOperand(2).getOpcode() == ISD::FMUL) {
7984       SDValue N20 = N1.getOperand(2).getOperand(0);
7985       SDValue N21 = N1.getOperand(2).getOperand(1);
7986       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7987                          DAG.getNode(ISD::FNEG, SL, VT,
7988                                      N1.getOperand(0)),
7989                          N1.getOperand(1),
7990                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7991                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
7992 
7993                                      N21, N0));
7994     }
7995 
7996     if (AllowFusion && LookThroughFPExt) {
7997       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
7998       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
7999       if (N0.getOpcode() == PreferredFusedOpcode) {
8000         SDValue N02 = N0.getOperand(2);
8001         if (N02.getOpcode() == ISD::FP_EXTEND) {
8002           SDValue N020 = N02.getOperand(0);
8003           if (N020.getOpcode() == ISD::FMUL)
8004             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8005                                N0.getOperand(0), N0.getOperand(1),
8006                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8007                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8008                                                        N020.getOperand(0)),
8009                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8010                                                        N020.getOperand(1)),
8011                                            DAG.getNode(ISD::FNEG, SL, VT,
8012                                                        N1)));
8013         }
8014       }
8015 
8016       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
8017       //   -> (fma (fpext x), (fpext y),
8018       //           (fma (fpext u), (fpext v), (fneg z)))
8019       // FIXME: This turns two single-precision and one double-precision
8020       // operation into two double-precision operations, which might not be
8021       // interesting for all targets, especially GPUs.
8022       if (N0.getOpcode() == ISD::FP_EXTEND) {
8023         SDValue N00 = N0.getOperand(0);
8024         if (N00.getOpcode() == PreferredFusedOpcode) {
8025           SDValue N002 = N00.getOperand(2);
8026           if (N002.getOpcode() == ISD::FMUL)
8027             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8028                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8029                                            N00.getOperand(0)),
8030                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8031                                            N00.getOperand(1)),
8032                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8033                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8034                                                        N002.getOperand(0)),
8035                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8036                                                        N002.getOperand(1)),
8037                                            DAG.getNode(ISD::FNEG, SL, VT,
8038                                                        N1)));
8039         }
8040       }
8041 
8042       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
8043       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
8044       if (N1.getOpcode() == PreferredFusedOpcode &&
8045         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
8046         SDValue N120 = N1.getOperand(2).getOperand(0);
8047         if (N120.getOpcode() == ISD::FMUL) {
8048           SDValue N1200 = N120.getOperand(0);
8049           SDValue N1201 = N120.getOperand(1);
8050           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8051                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
8052                              N1.getOperand(1),
8053                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8054                                          DAG.getNode(ISD::FNEG, SL, VT,
8055                                              DAG.getNode(ISD::FP_EXTEND, SL,
8056                                                          VT, N1200)),
8057                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8058                                                      N1201),
8059                                          N0));
8060         }
8061       }
8062 
8063       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
8064       //   -> (fma (fneg (fpext y)), (fpext z),
8065       //           (fma (fneg (fpext u)), (fpext v), x))
8066       // FIXME: This turns two single-precision and one double-precision
8067       // operation into two double-precision operations, which might not be
8068       // interesting for all targets, especially GPUs.
8069       if (N1.getOpcode() == ISD::FP_EXTEND &&
8070         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
8071         SDValue N100 = N1.getOperand(0).getOperand(0);
8072         SDValue N101 = N1.getOperand(0).getOperand(1);
8073         SDValue N102 = N1.getOperand(0).getOperand(2);
8074         if (N102.getOpcode() == ISD::FMUL) {
8075           SDValue N1020 = N102.getOperand(0);
8076           SDValue N1021 = N102.getOperand(1);
8077           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8078                              DAG.getNode(ISD::FNEG, SL, VT,
8079                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8080                                                      N100)),
8081                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
8082                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8083                                          DAG.getNode(ISD::FNEG, SL, VT,
8084                                              DAG.getNode(ISD::FP_EXTEND, SL,
8085                                                          VT, N1020)),
8086                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8087                                                      N1021),
8088                                          N0));
8089         }
8090       }
8091     }
8092   }
8093 
8094   return SDValue();
8095 }
8096 
8097 /// Try to perform FMA combining on a given FMUL node.
8098 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
8099   SDValue N0 = N->getOperand(0);
8100   SDValue N1 = N->getOperand(1);
8101   EVT VT = N->getValueType(0);
8102   SDLoc SL(N);
8103 
8104   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
8105 
8106   const TargetOptions &Options = DAG.getTarget().Options;
8107   bool AllowFusion =
8108       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8109 
8110   // Floating-point multiply-add with intermediate rounding.
8111   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8112 
8113   // Floating-point multiply-add without intermediate rounding.
8114   bool HasFMA =
8115       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8116       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8117 
8118   // No valid opcode, do not combine.
8119   if (!HasFMAD && !HasFMA)
8120     return SDValue();
8121 
8122   // Always prefer FMAD to FMA for precision.
8123   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8124   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8125 
8126   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
8127   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
8128   auto FuseFADD = [&](SDValue X, SDValue Y) {
8129     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
8130       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8131       if (XC1 && XC1->isExactlyValue(+1.0))
8132         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8133       if (XC1 && XC1->isExactlyValue(-1.0))
8134         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8135                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8136     }
8137     return SDValue();
8138   };
8139 
8140   if (SDValue FMA = FuseFADD(N0, N1))
8141     return FMA;
8142   if (SDValue FMA = FuseFADD(N1, N0))
8143     return FMA;
8144 
8145   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
8146   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
8147   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
8148   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
8149   auto FuseFSUB = [&](SDValue X, SDValue Y) {
8150     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
8151       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
8152       if (XC0 && XC0->isExactlyValue(+1.0))
8153         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8154                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8155                            Y);
8156       if (XC0 && XC0->isExactlyValue(-1.0))
8157         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8158                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8159                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8160 
8161       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8162       if (XC1 && XC1->isExactlyValue(+1.0))
8163         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8164                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8165       if (XC1 && XC1->isExactlyValue(-1.0))
8166         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8167     }
8168     return SDValue();
8169   };
8170 
8171   if (SDValue FMA = FuseFSUB(N0, N1))
8172     return FMA;
8173   if (SDValue FMA = FuseFSUB(N1, N0))
8174     return FMA;
8175 
8176   return SDValue();
8177 }
8178 
8179 SDValue DAGCombiner::visitFADD(SDNode *N) {
8180   SDValue N0 = N->getOperand(0);
8181   SDValue N1 = N->getOperand(1);
8182   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
8183   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
8184   EVT VT = N->getValueType(0);
8185   SDLoc DL(N);
8186   const TargetOptions &Options = DAG.getTarget().Options;
8187   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8188 
8189   // fold vector ops
8190   if (VT.isVector())
8191     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8192       return FoldedVOp;
8193 
8194   // fold (fadd c1, c2) -> c1 + c2
8195   if (N0CFP && N1CFP)
8196     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
8197 
8198   // canonicalize constant to RHS
8199   if (N0CFP && !N1CFP)
8200     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
8201 
8202   // fold (fadd A, (fneg B)) -> (fsub A, B)
8203   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8204       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
8205     return DAG.getNode(ISD::FSUB, DL, VT, N0,
8206                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8207 
8208   // fold (fadd (fneg A), B) -> (fsub B, A)
8209   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8210       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
8211     return DAG.getNode(ISD::FSUB, DL, VT, N1,
8212                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
8213 
8214   // If 'unsafe math' is enabled, fold lots of things.
8215   if (Options.UnsafeFPMath) {
8216     // No FP constant should be created after legalization as Instruction
8217     // Selection pass has a hard time dealing with FP constants.
8218     bool AllowNewConst = (Level < AfterLegalizeDAG);
8219 
8220     // fold (fadd A, 0) -> A
8221     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
8222       if (N1C->isZero())
8223         return N0;
8224 
8225     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
8226     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
8227         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
8228       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
8229                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
8230                                      Flags),
8231                          Flags);
8232 
8233     // If allowed, fold (fadd (fneg x), x) -> 0.0
8234     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
8235       return DAG.getConstantFP(0.0, DL, VT);
8236 
8237     // If allowed, fold (fadd x, (fneg x)) -> 0.0
8238     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
8239       return DAG.getConstantFP(0.0, DL, VT);
8240 
8241     // We can fold chains of FADD's of the same value into multiplications.
8242     // This transform is not safe in general because we are reducing the number
8243     // of rounding steps.
8244     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
8245       if (N0.getOpcode() == ISD::FMUL) {
8246         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8247         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
8248 
8249         // (fadd (fmul x, c), x) -> (fmul x, c+1)
8250         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
8251           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8252                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8253           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
8254         }
8255 
8256         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
8257         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
8258             N1.getOperand(0) == N1.getOperand(1) &&
8259             N0.getOperand(0) == N1.getOperand(0)) {
8260           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8261                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8262           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
8263         }
8264       }
8265 
8266       if (N1.getOpcode() == ISD::FMUL) {
8267         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8268         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
8269 
8270         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
8271         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
8272           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8273                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8274           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
8275         }
8276 
8277         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
8278         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
8279             N0.getOperand(0) == N0.getOperand(1) &&
8280             N1.getOperand(0) == N0.getOperand(0)) {
8281           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8282                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8283           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
8284         }
8285       }
8286 
8287       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
8288         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8289         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
8290         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
8291             (N0.getOperand(0) == N1)) {
8292           return DAG.getNode(ISD::FMUL, DL, VT,
8293                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
8294         }
8295       }
8296 
8297       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
8298         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8299         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
8300         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
8301             N1.getOperand(0) == N0) {
8302           return DAG.getNode(ISD::FMUL, DL, VT,
8303                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
8304         }
8305       }
8306 
8307       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
8308       if (AllowNewConst &&
8309           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
8310           N0.getOperand(0) == N0.getOperand(1) &&
8311           N1.getOperand(0) == N1.getOperand(1) &&
8312           N0.getOperand(0) == N1.getOperand(0)) {
8313         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
8314                            DAG.getConstantFP(4.0, DL, VT), Flags);
8315       }
8316     }
8317   } // enable-unsafe-fp-math
8318 
8319   // FADD -> FMA combines:
8320   if (SDValue Fused = visitFADDForFMACombine(N)) {
8321     AddToWorklist(Fused.getNode());
8322     return Fused;
8323   }
8324 
8325   return SDValue();
8326 }
8327 
8328 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8329   SDValue N0 = N->getOperand(0);
8330   SDValue N1 = N->getOperand(1);
8331   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8332   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8333   EVT VT = N->getValueType(0);
8334   SDLoc dl(N);
8335   const TargetOptions &Options = DAG.getTarget().Options;
8336   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8337 
8338   // fold vector ops
8339   if (VT.isVector())
8340     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8341       return FoldedVOp;
8342 
8343   // fold (fsub c1, c2) -> c1-c2
8344   if (N0CFP && N1CFP)
8345     return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
8346 
8347   // fold (fsub A, (fneg B)) -> (fadd A, B)
8348   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8349     return DAG.getNode(ISD::FADD, dl, VT, N0,
8350                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8351 
8352   // If 'unsafe math' is enabled, fold lots of things.
8353   if (Options.UnsafeFPMath) {
8354     // (fsub A, 0) -> A
8355     if (N1CFP && N1CFP->isZero())
8356       return N0;
8357 
8358     // (fsub 0, B) -> -B
8359     if (N0CFP && N0CFP->isZero()) {
8360       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8361         return GetNegatedExpression(N1, DAG, LegalOperations);
8362       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8363         return DAG.getNode(ISD::FNEG, dl, VT, N1);
8364     }
8365 
8366     // (fsub x, x) -> 0.0
8367     if (N0 == N1)
8368       return DAG.getConstantFP(0.0f, dl, VT);
8369 
8370     // (fsub x, (fadd x, y)) -> (fneg y)
8371     // (fsub x, (fadd y, x)) -> (fneg y)
8372     if (N1.getOpcode() == ISD::FADD) {
8373       SDValue N10 = N1->getOperand(0);
8374       SDValue N11 = N1->getOperand(1);
8375 
8376       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8377         return GetNegatedExpression(N11, DAG, LegalOperations);
8378 
8379       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8380         return GetNegatedExpression(N10, DAG, LegalOperations);
8381     }
8382   }
8383 
8384   // FSUB -> FMA combines:
8385   if (SDValue Fused = visitFSUBForFMACombine(N)) {
8386     AddToWorklist(Fused.getNode());
8387     return Fused;
8388   }
8389 
8390   return SDValue();
8391 }
8392 
8393 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8394   SDValue N0 = N->getOperand(0);
8395   SDValue N1 = N->getOperand(1);
8396   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8397   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8398   EVT VT = N->getValueType(0);
8399   SDLoc DL(N);
8400   const TargetOptions &Options = DAG.getTarget().Options;
8401   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8402 
8403   // fold vector ops
8404   if (VT.isVector()) {
8405     // This just handles C1 * C2 for vectors. Other vector folds are below.
8406     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8407       return FoldedVOp;
8408   }
8409 
8410   // fold (fmul c1, c2) -> c1*c2
8411   if (N0CFP && N1CFP)
8412     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
8413 
8414   // canonicalize constant to RHS
8415   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8416      !isConstantFPBuildVectorOrConstantFP(N1))
8417     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
8418 
8419   // fold (fmul A, 1.0) -> A
8420   if (N1CFP && N1CFP->isExactlyValue(1.0))
8421     return N0;
8422 
8423   if (Options.UnsafeFPMath) {
8424     // fold (fmul A, 0) -> 0
8425     if (N1CFP && N1CFP->isZero())
8426       return N1;
8427 
8428     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8429     if (N0.getOpcode() == ISD::FMUL) {
8430       // Fold scalars or any vector constants (not just splats).
8431       // This fold is done in general by InstCombine, but extra fmul insts
8432       // may have been generated during lowering.
8433       SDValue N00 = N0.getOperand(0);
8434       SDValue N01 = N0.getOperand(1);
8435       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8436       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8437       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8438 
8439       // Check 1: Make sure that the first operand of the inner multiply is NOT
8440       // a constant. Otherwise, we may induce infinite looping.
8441       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8442         // Check 2: Make sure that the second operand of the inner multiply and
8443         // the second operand of the outer multiply are constants.
8444         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8445             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8446           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
8447           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
8448         }
8449       }
8450     }
8451 
8452     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8453     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8454     // during an early run of DAGCombiner can prevent folding with fmuls
8455     // inserted during lowering.
8456     if (N0.getOpcode() == ISD::FADD &&
8457         (N0.getOperand(0) == N0.getOperand(1)) &&
8458         N0.hasOneUse()) {
8459       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8460       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
8461       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
8462     }
8463   }
8464 
8465   // fold (fmul X, 2.0) -> (fadd X, X)
8466   if (N1CFP && N1CFP->isExactlyValue(+2.0))
8467     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
8468 
8469   // fold (fmul X, -1.0) -> (fneg X)
8470   if (N1CFP && N1CFP->isExactlyValue(-1.0))
8471     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8472       return DAG.getNode(ISD::FNEG, DL, VT, N0);
8473 
8474   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8475   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8476     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8477       // Both can be negated for free, check to see if at least one is cheaper
8478       // negated.
8479       if (LHSNeg == 2 || RHSNeg == 2)
8480         return DAG.getNode(ISD::FMUL, DL, VT,
8481                            GetNegatedExpression(N0, DAG, LegalOperations),
8482                            GetNegatedExpression(N1, DAG, LegalOperations),
8483                            Flags);
8484     }
8485   }
8486 
8487   // FMUL -> FMA combines:
8488   if (SDValue Fused = visitFMULForFMACombine(N)) {
8489     AddToWorklist(Fused.getNode());
8490     return Fused;
8491   }
8492 
8493   return SDValue();
8494 }
8495 
8496 SDValue DAGCombiner::visitFMA(SDNode *N) {
8497   SDValue N0 = N->getOperand(0);
8498   SDValue N1 = N->getOperand(1);
8499   SDValue N2 = N->getOperand(2);
8500   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8501   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8502   EVT VT = N->getValueType(0);
8503   SDLoc dl(N);
8504   const TargetOptions &Options = DAG.getTarget().Options;
8505 
8506   // Constant fold FMA.
8507   if (isa<ConstantFPSDNode>(N0) &&
8508       isa<ConstantFPSDNode>(N1) &&
8509       isa<ConstantFPSDNode>(N2)) {
8510     return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
8511   }
8512 
8513   if (Options.UnsafeFPMath) {
8514     if (N0CFP && N0CFP->isZero())
8515       return N2;
8516     if (N1CFP && N1CFP->isZero())
8517       return N2;
8518   }
8519   // TODO: The FMA node should have flags that propagate to these nodes.
8520   if (N0CFP && N0CFP->isExactlyValue(1.0))
8521     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8522   if (N1CFP && N1CFP->isExactlyValue(1.0))
8523     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8524 
8525   // Canonicalize (fma c, x, y) -> (fma x, c, y)
8526   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8527      !isConstantFPBuildVectorOrConstantFP(N1))
8528     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8529 
8530   // TODO: FMA nodes should have flags that propagate to the created nodes.
8531   // For now, create a Flags object for use with all unsafe math transforms.
8532   SDNodeFlags Flags;
8533   Flags.setUnsafeAlgebra(true);
8534 
8535   if (Options.UnsafeFPMath) {
8536     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8537     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
8538         isConstantFPBuildVectorOrConstantFP(N1) &&
8539         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
8540       return DAG.getNode(ISD::FMUL, dl, VT, N0,
8541                          DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
8542                                      &Flags), &Flags);
8543     }
8544 
8545     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8546     if (N0.getOpcode() == ISD::FMUL &&
8547         isConstantFPBuildVectorOrConstantFP(N1) &&
8548         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
8549       return DAG.getNode(ISD::FMA, dl, VT,
8550                          N0.getOperand(0),
8551                          DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
8552                                      &Flags),
8553                          N2);
8554     }
8555   }
8556 
8557   // (fma x, 1, y) -> (fadd x, y)
8558   // (fma x, -1, y) -> (fadd (fneg x), y)
8559   if (N1CFP) {
8560     if (N1CFP->isExactlyValue(1.0))
8561       // TODO: The FMA node should have flags that propagate to this node.
8562       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
8563 
8564     if (N1CFP->isExactlyValue(-1.0) &&
8565         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8566       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
8567       AddToWorklist(RHSNeg.getNode());
8568       // TODO: The FMA node should have flags that propagate to this node.
8569       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
8570     }
8571   }
8572 
8573   if (Options.UnsafeFPMath) {
8574     // (fma x, c, x) -> (fmul x, (c+1))
8575     if (N1CFP && N0 == N2) {
8576     return DAG.getNode(ISD::FMUL, dl, VT, N0,
8577                          DAG.getNode(ISD::FADD, dl, VT,
8578                                      N1, DAG.getConstantFP(1.0, dl, VT),
8579                                      &Flags), &Flags);
8580     }
8581 
8582     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
8583     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
8584       return DAG.getNode(ISD::FMUL, dl, VT, N0,
8585                          DAG.getNode(ISD::FADD, dl, VT,
8586                                      N1, DAG.getConstantFP(-1.0, dl, VT),
8587                                      &Flags), &Flags);
8588     }
8589   }
8590 
8591   return SDValue();
8592 }
8593 
8594 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8595 // reciprocal.
8596 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8597 // Notice that this is not always beneficial. One reason is different target
8598 // may have different costs for FDIV and FMUL, so sometimes the cost of two
8599 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8600 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8601 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
8602   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
8603   const SDNodeFlags *Flags = N->getFlags();
8604   if (!UnsafeMath && !Flags->hasAllowReciprocal())
8605     return SDValue();
8606 
8607   // Skip if current node is a reciprocal.
8608   SDValue N0 = N->getOperand(0);
8609   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8610   if (N0CFP && N0CFP->isExactlyValue(1.0))
8611     return SDValue();
8612 
8613   // Exit early if the target does not want this transform or if there can't
8614   // possibly be enough uses of the divisor to make the transform worthwhile.
8615   SDValue N1 = N->getOperand(1);
8616   unsigned MinUses = TLI.combineRepeatedFPDivisors();
8617   if (!MinUses || N1->use_size() < MinUses)
8618     return SDValue();
8619 
8620   // Find all FDIV users of the same divisor.
8621   // Use a set because duplicates may be present in the user list.
8622   SetVector<SDNode *> Users;
8623   for (auto *U : N1->uses()) {
8624     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
8625       // This division is eligible for optimization only if global unsafe math
8626       // is enabled or if this division allows reciprocal formation.
8627       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
8628         Users.insert(U);
8629     }
8630   }
8631 
8632   // Now that we have the actual number of divisor uses, make sure it meets
8633   // the minimum threshold specified by the target.
8634   if (Users.size() < MinUses)
8635     return SDValue();
8636 
8637   EVT VT = N->getValueType(0);
8638   SDLoc DL(N);
8639   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8640   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
8641 
8642   // Dividend / Divisor -> Dividend * Reciprocal
8643   for (auto *U : Users) {
8644     SDValue Dividend = U->getOperand(0);
8645     if (Dividend != FPOne) {
8646       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8647                                     Reciprocal, Flags);
8648       CombineTo(U, NewNode);
8649     } else if (U != Reciprocal.getNode()) {
8650       // In the absence of fast-math-flags, this user node is always the
8651       // same node as Reciprocal, but with FMF they may be different nodes.
8652       CombineTo(U, Reciprocal);
8653     }
8654   }
8655   return SDValue(N, 0);  // N was replaced.
8656 }
8657 
8658 SDValue DAGCombiner::visitFDIV(SDNode *N) {
8659   SDValue N0 = N->getOperand(0);
8660   SDValue N1 = N->getOperand(1);
8661   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8662   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8663   EVT VT = N->getValueType(0);
8664   SDLoc DL(N);
8665   const TargetOptions &Options = DAG.getTarget().Options;
8666   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8667 
8668   // fold vector ops
8669   if (VT.isVector())
8670     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8671       return FoldedVOp;
8672 
8673   // fold (fdiv c1, c2) -> c1/c2
8674   if (N0CFP && N1CFP)
8675     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
8676 
8677   if (Options.UnsafeFPMath) {
8678     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
8679     if (N1CFP) {
8680       // Compute the reciprocal 1.0 / c2.
8681       APFloat N1APF = N1CFP->getValueAPF();
8682       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
8683       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
8684       // Only do the transform if the reciprocal is a legal fp immediate that
8685       // isn't too nasty (eg NaN, denormal, ...).
8686       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
8687           (!LegalOperations ||
8688            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
8689            // backend)... we should handle this gracefully after Legalize.
8690            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
8691            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
8692            TLI.isFPImmLegal(Recip, VT)))
8693         return DAG.getNode(ISD::FMUL, DL, VT, N0,
8694                            DAG.getConstantFP(Recip, DL, VT), Flags);
8695     }
8696 
8697     // If this FDIV is part of a reciprocal square root, it may be folded
8698     // into a target-specific square root estimate instruction.
8699     if (N1.getOpcode() == ISD::FSQRT) {
8700       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
8701         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8702       }
8703     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
8704                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8705       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8706                                           Flags)) {
8707         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
8708         AddToWorklist(RV.getNode());
8709         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8710       }
8711     } else if (N1.getOpcode() == ISD::FP_ROUND &&
8712                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8713       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8714                                           Flags)) {
8715         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
8716         AddToWorklist(RV.getNode());
8717         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8718       }
8719     } else if (N1.getOpcode() == ISD::FMUL) {
8720       // Look through an FMUL. Even though this won't remove the FDIV directly,
8721       // it's still worthwhile to get rid of the FSQRT if possible.
8722       SDValue SqrtOp;
8723       SDValue OtherOp;
8724       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8725         SqrtOp = N1.getOperand(0);
8726         OtherOp = N1.getOperand(1);
8727       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
8728         SqrtOp = N1.getOperand(1);
8729         OtherOp = N1.getOperand(0);
8730       }
8731       if (SqrtOp.getNode()) {
8732         // We found a FSQRT, so try to make this fold:
8733         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
8734         if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
8735           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
8736           AddToWorklist(RV.getNode());
8737           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8738         }
8739       }
8740     }
8741 
8742     // Fold into a reciprocal estimate and multiply instead of a real divide.
8743     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
8744       AddToWorklist(RV.getNode());
8745       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8746     }
8747   }
8748 
8749   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
8750   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8751     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8752       // Both can be negated for free, check to see if at least one is cheaper
8753       // negated.
8754       if (LHSNeg == 2 || RHSNeg == 2)
8755         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
8756                            GetNegatedExpression(N0, DAG, LegalOperations),
8757                            GetNegatedExpression(N1, DAG, LegalOperations),
8758                            Flags);
8759     }
8760   }
8761 
8762   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
8763     return CombineRepeatedDivisors;
8764 
8765   return SDValue();
8766 }
8767 
8768 SDValue DAGCombiner::visitFREM(SDNode *N) {
8769   SDValue N0 = N->getOperand(0);
8770   SDValue N1 = N->getOperand(1);
8771   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8772   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8773   EVT VT = N->getValueType(0);
8774 
8775   // fold (frem c1, c2) -> fmod(c1,c2)
8776   if (N0CFP && N1CFP)
8777     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
8778                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
8779 
8780   return SDValue();
8781 }
8782 
8783 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
8784   if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
8785     return SDValue();
8786 
8787   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
8788   // For now, create a Flags object for use with all unsafe math transforms.
8789   SDNodeFlags Flags;
8790   Flags.setUnsafeAlgebra(true);
8791 
8792   // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
8793   SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
8794   if (!RV)
8795     return SDValue();
8796 
8797   EVT VT = RV.getValueType();
8798   SDLoc DL(N);
8799   RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
8800   AddToWorklist(RV.getNode());
8801 
8802   // Unfortunately, RV is now NaN if the input was exactly 0.
8803   // Select out this case and force the answer to 0.
8804   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
8805   EVT CCVT = getSetCCResultType(VT);
8806   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
8807   AddToWorklist(ZeroCmp.getNode());
8808   AddToWorklist(RV.getNode());
8809 
8810   return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
8811                      ZeroCmp, Zero, RV);
8812 }
8813 
8814 /// copysign(x, fp_extend(y)) -> copysign(x, y)
8815 /// copysign(x, fp_round(y)) -> copysign(x, y)
8816 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
8817   SDValue N1 = N->getOperand(1);
8818   if ((N1.getOpcode() == ISD::FP_EXTEND ||
8819        N1.getOpcode() == ISD::FP_ROUND)) {
8820     // Do not optimize out type conversion of f128 type yet.
8821     // For some targets like x86_64, configuration is changed to keep one f128
8822     // value in one SSE register, but instruction selection cannot handle
8823     // FCOPYSIGN on SSE registers yet.
8824     EVT N1VT = N1->getValueType(0);
8825     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
8826     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
8827   }
8828   return false;
8829 }
8830 
8831 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
8832   SDValue N0 = N->getOperand(0);
8833   SDValue N1 = N->getOperand(1);
8834   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8835   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8836   EVT VT = N->getValueType(0);
8837 
8838   if (N0CFP && N1CFP)  // Constant fold
8839     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
8840 
8841   if (N1CFP) {
8842     const APFloat& V = N1CFP->getValueAPF();
8843     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
8844     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
8845     if (!V.isNegative()) {
8846       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
8847         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
8848     } else {
8849       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8850         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
8851                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
8852     }
8853   }
8854 
8855   // copysign(fabs(x), y) -> copysign(x, y)
8856   // copysign(fneg(x), y) -> copysign(x, y)
8857   // copysign(copysign(x,z), y) -> copysign(x, y)
8858   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
8859       N0.getOpcode() == ISD::FCOPYSIGN)
8860     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8861                        N0.getOperand(0), N1);
8862 
8863   // copysign(x, abs(y)) -> abs(x)
8864   if (N1.getOpcode() == ISD::FABS)
8865     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
8866 
8867   // copysign(x, copysign(y,z)) -> copysign(x, z)
8868   if (N1.getOpcode() == ISD::FCOPYSIGN)
8869     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8870                        N0, N1.getOperand(1));
8871 
8872   // copysign(x, fp_extend(y)) -> copysign(x, y)
8873   // copysign(x, fp_round(y)) -> copysign(x, y)
8874   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
8875     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8876                        N0, N1.getOperand(0));
8877 
8878   return SDValue();
8879 }
8880 
8881 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
8882   SDValue N0 = N->getOperand(0);
8883   EVT VT = N->getValueType(0);
8884   EVT OpVT = N0.getValueType();
8885 
8886   // fold (sint_to_fp c1) -> c1fp
8887   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8888       // ...but only if the target supports immediate floating-point values
8889       (!LegalOperations ||
8890        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
8891     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
8892 
8893   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
8894   // but UINT_TO_FP is legal on this target, try to convert.
8895   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
8896       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
8897     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
8898     if (DAG.SignBitIsZero(N0))
8899       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
8900   }
8901 
8902   // The next optimizations are desirable only if SELECT_CC can be lowered.
8903   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
8904     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
8905     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
8906         !VT.isVector() &&
8907         (!LegalOperations ||
8908          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8909       SDLoc DL(N);
8910       SDValue Ops[] =
8911         { N0.getOperand(0), N0.getOperand(1),
8912           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8913           N0.getOperand(2) };
8914       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8915     }
8916 
8917     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
8918     //      (select_cc x, y, 1.0, 0.0,, cc)
8919     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
8920         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
8921         (!LegalOperations ||
8922          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8923       SDLoc DL(N);
8924       SDValue Ops[] =
8925         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
8926           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8927           N0.getOperand(0).getOperand(2) };
8928       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8929     }
8930   }
8931 
8932   return SDValue();
8933 }
8934 
8935 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
8936   SDValue N0 = N->getOperand(0);
8937   EVT VT = N->getValueType(0);
8938   EVT OpVT = N0.getValueType();
8939 
8940   // fold (uint_to_fp c1) -> c1fp
8941   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8942       // ...but only if the target supports immediate floating-point values
8943       (!LegalOperations ||
8944        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
8945     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
8946 
8947   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
8948   // but SINT_TO_FP is legal on this target, try to convert.
8949   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
8950       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
8951     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
8952     if (DAG.SignBitIsZero(N0))
8953       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
8954   }
8955 
8956   // The next optimizations are desirable only if SELECT_CC can be lowered.
8957   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
8958     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
8959 
8960     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
8961         (!LegalOperations ||
8962          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8963       SDLoc DL(N);
8964       SDValue Ops[] =
8965         { N0.getOperand(0), N0.getOperand(1),
8966           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8967           N0.getOperand(2) };
8968       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8969     }
8970   }
8971 
8972   return SDValue();
8973 }
8974 
8975 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
8976 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
8977   SDValue N0 = N->getOperand(0);
8978   EVT VT = N->getValueType(0);
8979 
8980   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
8981     return SDValue();
8982 
8983   SDValue Src = N0.getOperand(0);
8984   EVT SrcVT = Src.getValueType();
8985   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
8986   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
8987 
8988   // We can safely assume the conversion won't overflow the output range,
8989   // because (for example) (uint8_t)18293.f is undefined behavior.
8990 
8991   // Since we can assume the conversion won't overflow, our decision as to
8992   // whether the input will fit in the float should depend on the minimum
8993   // of the input range and output range.
8994 
8995   // This means this is also safe for a signed input and unsigned output, since
8996   // a negative input would lead to undefined behavior.
8997   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
8998   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
8999   unsigned ActualSize = std::min(InputSize, OutputSize);
9000   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
9001 
9002   // We can only fold away the float conversion if the input range can be
9003   // represented exactly in the float range.
9004   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
9005     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
9006       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
9007                                                        : ISD::ZERO_EXTEND;
9008       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
9009     }
9010     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
9011       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
9012     return DAG.getBitcast(VT, Src);
9013   }
9014   return SDValue();
9015 }
9016 
9017 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
9018   SDValue N0 = N->getOperand(0);
9019   EVT VT = N->getValueType(0);
9020 
9021   // fold (fp_to_sint c1fp) -> c1
9022   if (isConstantFPBuildVectorOrConstantFP(N0))
9023     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
9024 
9025   return FoldIntToFPToInt(N, DAG);
9026 }
9027 
9028 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
9029   SDValue N0 = N->getOperand(0);
9030   EVT VT = N->getValueType(0);
9031 
9032   // fold (fp_to_uint c1fp) -> c1
9033   if (isConstantFPBuildVectorOrConstantFP(N0))
9034     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
9035 
9036   return FoldIntToFPToInt(N, DAG);
9037 }
9038 
9039 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
9040   SDValue N0 = N->getOperand(0);
9041   SDValue N1 = N->getOperand(1);
9042   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9043   EVT VT = N->getValueType(0);
9044 
9045   // fold (fp_round c1fp) -> c1fp
9046   if (N0CFP)
9047     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
9048 
9049   // fold (fp_round (fp_extend x)) -> x
9050   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
9051     return N0.getOperand(0);
9052 
9053   // fold (fp_round (fp_round x)) -> (fp_round x)
9054   if (N0.getOpcode() == ISD::FP_ROUND) {
9055     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
9056     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
9057 
9058     // Skip this folding if it results in an fp_round from f80 to f16.
9059     //
9060     // f80 to f16 always generates an expensive (and as yet, unimplemented)
9061     // libcall to __truncxfhf2 instead of selecting native f16 conversion
9062     // instructions from f32 or f64.  Moreover, the first (value-preserving)
9063     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
9064     // x86.
9065     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
9066       return SDValue();
9067 
9068     // If the first fp_round isn't a value preserving truncation, it might
9069     // introduce a tie in the second fp_round, that wouldn't occur in the
9070     // single-step fp_round we want to fold to.
9071     // In other words, double rounding isn't the same as rounding.
9072     // Also, this is a value preserving truncation iff both fp_round's are.
9073     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
9074       SDLoc DL(N);
9075       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
9076                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
9077     }
9078   }
9079 
9080   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
9081   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
9082     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
9083                               N0.getOperand(0), N1);
9084     AddToWorklist(Tmp.getNode());
9085     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9086                        Tmp, N0.getOperand(1));
9087   }
9088 
9089   return SDValue();
9090 }
9091 
9092 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
9093   SDValue N0 = N->getOperand(0);
9094   EVT VT = N->getValueType(0);
9095   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9096   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9097 
9098   // fold (fp_round_inreg c1fp) -> c1fp
9099   if (N0CFP && isTypeLegal(EVT)) {
9100     SDLoc DL(N);
9101     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
9102     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
9103   }
9104 
9105   return SDValue();
9106 }
9107 
9108 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
9109   SDValue N0 = N->getOperand(0);
9110   EVT VT = N->getValueType(0);
9111 
9112   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
9113   if (N->hasOneUse() &&
9114       N->use_begin()->getOpcode() == ISD::FP_ROUND)
9115     return SDValue();
9116 
9117   // fold (fp_extend c1fp) -> c1fp
9118   if (isConstantFPBuildVectorOrConstantFP(N0))
9119     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
9120 
9121   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
9122   if (N0.getOpcode() == ISD::FP16_TO_FP &&
9123       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
9124     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
9125 
9126   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
9127   // value of X.
9128   if (N0.getOpcode() == ISD::FP_ROUND
9129       && N0.getNode()->getConstantOperandVal(1) == 1) {
9130     SDValue In = N0.getOperand(0);
9131     if (In.getValueType() == VT) return In;
9132     if (VT.bitsLT(In.getValueType()))
9133       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
9134                          In, N0.getOperand(1));
9135     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
9136   }
9137 
9138   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
9139   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9140        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9141     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9142     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9143                                      LN0->getChain(),
9144                                      LN0->getBasePtr(), N0.getValueType(),
9145                                      LN0->getMemOperand());
9146     CombineTo(N, ExtLoad);
9147     CombineTo(N0.getNode(),
9148               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
9149                           N0.getValueType(), ExtLoad,
9150                           DAG.getIntPtrConstant(1, SDLoc(N0))),
9151               ExtLoad.getValue(1));
9152     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9153   }
9154 
9155   return SDValue();
9156 }
9157 
9158 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
9159   SDValue N0 = N->getOperand(0);
9160   EVT VT = N->getValueType(0);
9161 
9162   // fold (fceil c1) -> fceil(c1)
9163   if (isConstantFPBuildVectorOrConstantFP(N0))
9164     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
9165 
9166   return SDValue();
9167 }
9168 
9169 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
9170   SDValue N0 = N->getOperand(0);
9171   EVT VT = N->getValueType(0);
9172 
9173   // fold (ftrunc c1) -> ftrunc(c1)
9174   if (isConstantFPBuildVectorOrConstantFP(N0))
9175     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
9176 
9177   return SDValue();
9178 }
9179 
9180 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
9181   SDValue N0 = N->getOperand(0);
9182   EVT VT = N->getValueType(0);
9183 
9184   // fold (ffloor c1) -> ffloor(c1)
9185   if (isConstantFPBuildVectorOrConstantFP(N0))
9186     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
9187 
9188   return SDValue();
9189 }
9190 
9191 // FIXME: FNEG and FABS have a lot in common; refactor.
9192 SDValue DAGCombiner::visitFNEG(SDNode *N) {
9193   SDValue N0 = N->getOperand(0);
9194   EVT VT = N->getValueType(0);
9195 
9196   // Constant fold FNEG.
9197   if (isConstantFPBuildVectorOrConstantFP(N0))
9198     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
9199 
9200   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
9201                          &DAG.getTarget().Options))
9202     return GetNegatedExpression(N0, DAG, LegalOperations);
9203 
9204   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
9205   // constant pool values.
9206   if (!TLI.isFNegFree(VT) &&
9207       N0.getOpcode() == ISD::BITCAST &&
9208       N0.getNode()->hasOneUse()) {
9209     SDValue Int = N0.getOperand(0);
9210     EVT IntVT = Int.getValueType();
9211     if (IntVT.isInteger() && !IntVT.isVector()) {
9212       APInt SignMask;
9213       if (N0.getValueType().isVector()) {
9214         // For a vector, get a mask such as 0x80... per scalar element
9215         // and splat it.
9216         SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
9217         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9218       } else {
9219         // For a scalar, just generate 0x80...
9220         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
9221       }
9222       SDLoc DL0(N0);
9223       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
9224                         DAG.getConstant(SignMask, DL0, IntVT));
9225       AddToWorklist(Int.getNode());
9226       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
9227     }
9228   }
9229 
9230   // (fneg (fmul c, x)) -> (fmul -c, x)
9231   if (N0.getOpcode() == ISD::FMUL &&
9232       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
9233     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
9234     if (CFP1) {
9235       APFloat CVal = CFP1->getValueAPF();
9236       CVal.changeSign();
9237       if (Level >= AfterLegalizeDAG &&
9238           (TLI.isFPImmLegal(CVal, VT) ||
9239            TLI.isOperationLegal(ISD::ConstantFP, VT)))
9240         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
9241                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9242                                        N0.getOperand(1)),
9243                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
9244     }
9245   }
9246 
9247   return SDValue();
9248 }
9249 
9250 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
9251   SDValue N0 = N->getOperand(0);
9252   SDValue N1 = N->getOperand(1);
9253   EVT VT = N->getValueType(0);
9254   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9255   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9256 
9257   if (N0CFP && N1CFP) {
9258     const APFloat &C0 = N0CFP->getValueAPF();
9259     const APFloat &C1 = N1CFP->getValueAPF();
9260     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
9261   }
9262 
9263   // Canonicalize to constant on RHS.
9264   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9265      !isConstantFPBuildVectorOrConstantFP(N1))
9266     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
9267 
9268   return SDValue();
9269 }
9270 
9271 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
9272   SDValue N0 = N->getOperand(0);
9273   SDValue N1 = N->getOperand(1);
9274   EVT VT = N->getValueType(0);
9275   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9276   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9277 
9278   if (N0CFP && N1CFP) {
9279     const APFloat &C0 = N0CFP->getValueAPF();
9280     const APFloat &C1 = N1CFP->getValueAPF();
9281     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
9282   }
9283 
9284   // Canonicalize to constant on RHS.
9285   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9286      !isConstantFPBuildVectorOrConstantFP(N1))
9287     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
9288 
9289   return SDValue();
9290 }
9291 
9292 SDValue DAGCombiner::visitFABS(SDNode *N) {
9293   SDValue N0 = N->getOperand(0);
9294   EVT VT = N->getValueType(0);
9295 
9296   // fold (fabs c1) -> fabs(c1)
9297   if (isConstantFPBuildVectorOrConstantFP(N0))
9298     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9299 
9300   // fold (fabs (fabs x)) -> (fabs x)
9301   if (N0.getOpcode() == ISD::FABS)
9302     return N->getOperand(0);
9303 
9304   // fold (fabs (fneg x)) -> (fabs x)
9305   // fold (fabs (fcopysign x, y)) -> (fabs x)
9306   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
9307     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
9308 
9309   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
9310   // constant pool values.
9311   if (!TLI.isFAbsFree(VT) &&
9312       N0.getOpcode() == ISD::BITCAST &&
9313       N0.getNode()->hasOneUse()) {
9314     SDValue Int = N0.getOperand(0);
9315     EVT IntVT = Int.getValueType();
9316     if (IntVT.isInteger() && !IntVT.isVector()) {
9317       APInt SignMask;
9318       if (N0.getValueType().isVector()) {
9319         // For a vector, get a mask such as 0x7f... per scalar element
9320         // and splat it.
9321         SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
9322         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9323       } else {
9324         // For a scalar, just generate 0x7f...
9325         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
9326       }
9327       SDLoc DL(N0);
9328       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
9329                         DAG.getConstant(SignMask, DL, IntVT));
9330       AddToWorklist(Int.getNode());
9331       return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
9332     }
9333   }
9334 
9335   return SDValue();
9336 }
9337 
9338 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
9339   SDValue Chain = N->getOperand(0);
9340   SDValue N1 = N->getOperand(1);
9341   SDValue N2 = N->getOperand(2);
9342 
9343   // If N is a constant we could fold this into a fallthrough or unconditional
9344   // branch. However that doesn't happen very often in normal code, because
9345   // Instcombine/SimplifyCFG should have handled the available opportunities.
9346   // If we did this folding here, it would be necessary to update the
9347   // MachineBasicBlock CFG, which is awkward.
9348 
9349   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
9350   // on the target.
9351   if (N1.getOpcode() == ISD::SETCC &&
9352       TLI.isOperationLegalOrCustom(ISD::BR_CC,
9353                                    N1.getOperand(0).getValueType())) {
9354     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9355                        Chain, N1.getOperand(2),
9356                        N1.getOperand(0), N1.getOperand(1), N2);
9357   }
9358 
9359   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
9360       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
9361        (N1.getOperand(0).hasOneUse() &&
9362         N1.getOperand(0).getOpcode() == ISD::SRL))) {
9363     SDNode *Trunc = nullptr;
9364     if (N1.getOpcode() == ISD::TRUNCATE) {
9365       // Look pass the truncate.
9366       Trunc = N1.getNode();
9367       N1 = N1.getOperand(0);
9368     }
9369 
9370     // Match this pattern so that we can generate simpler code:
9371     //
9372     //   %a = ...
9373     //   %b = and i32 %a, 2
9374     //   %c = srl i32 %b, 1
9375     //   brcond i32 %c ...
9376     //
9377     // into
9378     //
9379     //   %a = ...
9380     //   %b = and i32 %a, 2
9381     //   %c = setcc eq %b, 0
9382     //   brcond %c ...
9383     //
9384     // This applies only when the AND constant value has one bit set and the
9385     // SRL constant is equal to the log2 of the AND constant. The back-end is
9386     // smart enough to convert the result into a TEST/JMP sequence.
9387     SDValue Op0 = N1.getOperand(0);
9388     SDValue Op1 = N1.getOperand(1);
9389 
9390     if (Op0.getOpcode() == ISD::AND &&
9391         Op1.getOpcode() == ISD::Constant) {
9392       SDValue AndOp1 = Op0.getOperand(1);
9393 
9394       if (AndOp1.getOpcode() == ISD::Constant) {
9395         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
9396 
9397         if (AndConst.isPowerOf2() &&
9398             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9399           SDLoc DL(N);
9400           SDValue SetCC =
9401             DAG.getSetCC(DL,
9402                          getSetCCResultType(Op0.getValueType()),
9403                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9404                          ISD::SETNE);
9405 
9406           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9407                                           MVT::Other, Chain, SetCC, N2);
9408           // Don't add the new BRCond into the worklist or else SimplifySelectCC
9409           // will convert it back to (X & C1) >> C2.
9410           CombineTo(N, NewBRCond, false);
9411           // Truncate is dead.
9412           if (Trunc)
9413             deleteAndRecombine(Trunc);
9414           // Replace the uses of SRL with SETCC
9415           WorklistRemover DeadNodes(*this);
9416           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9417           deleteAndRecombine(N1.getNode());
9418           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9419         }
9420       }
9421     }
9422 
9423     if (Trunc)
9424       // Restore N1 if the above transformation doesn't match.
9425       N1 = N->getOperand(1);
9426   }
9427 
9428   // Transform br(xor(x, y)) -> br(x != y)
9429   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9430   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9431     SDNode *TheXor = N1.getNode();
9432     SDValue Op0 = TheXor->getOperand(0);
9433     SDValue Op1 = TheXor->getOperand(1);
9434     if (Op0.getOpcode() == Op1.getOpcode()) {
9435       // Avoid missing important xor optimizations.
9436       if (SDValue Tmp = visitXOR(TheXor)) {
9437         if (Tmp.getNode() != TheXor) {
9438           DEBUG(dbgs() << "\nReplacing.8 ";
9439                 TheXor->dump(&DAG);
9440                 dbgs() << "\nWith: ";
9441                 Tmp.getNode()->dump(&DAG);
9442                 dbgs() << '\n');
9443           WorklistRemover DeadNodes(*this);
9444           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9445           deleteAndRecombine(TheXor);
9446           return DAG.getNode(ISD::BRCOND, SDLoc(N),
9447                              MVT::Other, Chain, Tmp, N2);
9448         }
9449 
9450         // visitXOR has changed XOR's operands or replaced the XOR completely,
9451         // bail out.
9452         return SDValue(N, 0);
9453       }
9454     }
9455 
9456     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9457       bool Equal = false;
9458       if (isOneConstant(Op0) && Op0.hasOneUse() &&
9459           Op0.getOpcode() == ISD::XOR) {
9460         TheXor = Op0.getNode();
9461         Equal = true;
9462       }
9463 
9464       EVT SetCCVT = N1.getValueType();
9465       if (LegalTypes)
9466         SetCCVT = getSetCCResultType(SetCCVT);
9467       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9468                                    SetCCVT,
9469                                    Op0, Op1,
9470                                    Equal ? ISD::SETEQ : ISD::SETNE);
9471       // Replace the uses of XOR with SETCC
9472       WorklistRemover DeadNodes(*this);
9473       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9474       deleteAndRecombine(N1.getNode());
9475       return DAG.getNode(ISD::BRCOND, SDLoc(N),
9476                          MVT::Other, Chain, SetCC, N2);
9477     }
9478   }
9479 
9480   return SDValue();
9481 }
9482 
9483 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9484 //
9485 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9486   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9487   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9488 
9489   // If N is a constant we could fold this into a fallthrough or unconditional
9490   // branch. However that doesn't happen very often in normal code, because
9491   // Instcombine/SimplifyCFG should have handled the available opportunities.
9492   // If we did this folding here, it would be necessary to update the
9493   // MachineBasicBlock CFG, which is awkward.
9494 
9495   // Use SimplifySetCC to simplify SETCC's.
9496   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9497                                CondLHS, CondRHS, CC->get(), SDLoc(N),
9498                                false);
9499   if (Simp.getNode()) AddToWorklist(Simp.getNode());
9500 
9501   // fold to a simpler setcc
9502   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9503     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9504                        N->getOperand(0), Simp.getOperand(2),
9505                        Simp.getOperand(0), Simp.getOperand(1),
9506                        N->getOperand(4));
9507 
9508   return SDValue();
9509 }
9510 
9511 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9512 /// and that N may be folded in the load / store addressing mode.
9513 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
9514                                     SelectionDAG &DAG,
9515                                     const TargetLowering &TLI) {
9516   EVT VT;
9517   unsigned AS;
9518 
9519   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
9520     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9521       return false;
9522     VT = LD->getMemoryVT();
9523     AS = LD->getAddressSpace();
9524   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
9525     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9526       return false;
9527     VT = ST->getMemoryVT();
9528     AS = ST->getAddressSpace();
9529   } else
9530     return false;
9531 
9532   TargetLowering::AddrMode AM;
9533   if (N->getOpcode() == ISD::ADD) {
9534     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9535     if (Offset)
9536       // [reg +/- imm]
9537       AM.BaseOffs = Offset->getSExtValue();
9538     else
9539       // [reg +/- reg]
9540       AM.Scale = 1;
9541   } else if (N->getOpcode() == ISD::SUB) {
9542     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9543     if (Offset)
9544       // [reg +/- imm]
9545       AM.BaseOffs = -Offset->getSExtValue();
9546     else
9547       // [reg +/- reg]
9548       AM.Scale = 1;
9549   } else
9550     return false;
9551 
9552   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9553                                    VT.getTypeForEVT(*DAG.getContext()), AS);
9554 }
9555 
9556 /// Try turning a load/store into a pre-indexed load/store when the base
9557 /// pointer is an add or subtract and it has other uses besides the load/store.
9558 /// After the transformation, the new indexed load/store has effectively folded
9559 /// the add/subtract in and all of its other uses are redirected to the
9560 /// new load/store.
9561 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9562   if (Level < AfterLegalizeDAG)
9563     return false;
9564 
9565   bool isLoad = true;
9566   SDValue Ptr;
9567   EVT VT;
9568   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9569     if (LD->isIndexed())
9570       return false;
9571     VT = LD->getMemoryVT();
9572     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9573         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9574       return false;
9575     Ptr = LD->getBasePtr();
9576   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9577     if (ST->isIndexed())
9578       return false;
9579     VT = ST->getMemoryVT();
9580     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9581         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9582       return false;
9583     Ptr = ST->getBasePtr();
9584     isLoad = false;
9585   } else {
9586     return false;
9587   }
9588 
9589   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
9590   // out.  There is no reason to make this a preinc/predec.
9591   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
9592       Ptr.getNode()->hasOneUse())
9593     return false;
9594 
9595   // Ask the target to do addressing mode selection.
9596   SDValue BasePtr;
9597   SDValue Offset;
9598   ISD::MemIndexedMode AM = ISD::UNINDEXED;
9599   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
9600     return false;
9601 
9602   // Backends without true r+i pre-indexed forms may need to pass a
9603   // constant base with a variable offset so that constant coercion
9604   // will work with the patterns in canonical form.
9605   bool Swapped = false;
9606   if (isa<ConstantSDNode>(BasePtr)) {
9607     std::swap(BasePtr, Offset);
9608     Swapped = true;
9609   }
9610 
9611   // Don't create a indexed load / store with zero offset.
9612   if (isNullConstant(Offset))
9613     return false;
9614 
9615   // Try turning it into a pre-indexed load / store except when:
9616   // 1) The new base ptr is a frame index.
9617   // 2) If N is a store and the new base ptr is either the same as or is a
9618   //    predecessor of the value being stored.
9619   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
9620   //    that would create a cycle.
9621   // 4) All uses are load / store ops that use it as old base ptr.
9622 
9623   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
9624   // (plus the implicit offset) to a register to preinc anyway.
9625   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9626     return false;
9627 
9628   // Check #2.
9629   if (!isLoad) {
9630     SDValue Val = cast<StoreSDNode>(N)->getValue();
9631     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
9632       return false;
9633   }
9634 
9635   // Caches for hasPredecessorHelper.
9636   SmallPtrSet<const SDNode *, 32> Visited;
9637   SmallVector<const SDNode *, 16> Worklist;
9638 
9639   // If the offset is a constant, there may be other adds of constants that
9640   // can be folded with this one. We should do this to avoid having to keep
9641   // a copy of the original base pointer.
9642   SmallVector<SDNode *, 16> OtherUses;
9643   if (isa<ConstantSDNode>(Offset))
9644     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
9645                               UE = BasePtr.getNode()->use_end();
9646          UI != UE; ++UI) {
9647       SDUse &Use = UI.getUse();
9648       // Skip the use that is Ptr and uses of other results from BasePtr's
9649       // node (important for nodes that return multiple results).
9650       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
9651         continue;
9652 
9653       if (N->hasPredecessorHelper(Use.getUser(), Visited, Worklist))
9654         continue;
9655 
9656       if (Use.getUser()->getOpcode() != ISD::ADD &&
9657           Use.getUser()->getOpcode() != ISD::SUB) {
9658         OtherUses.clear();
9659         break;
9660       }
9661 
9662       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
9663       if (!isa<ConstantSDNode>(Op1)) {
9664         OtherUses.clear();
9665         break;
9666       }
9667 
9668       // FIXME: In some cases, we can be smarter about this.
9669       if (Op1.getValueType() != Offset.getValueType()) {
9670         OtherUses.clear();
9671         break;
9672       }
9673 
9674       OtherUses.push_back(Use.getUser());
9675     }
9676 
9677   if (Swapped)
9678     std::swap(BasePtr, Offset);
9679 
9680   // Now check for #3 and #4.
9681   bool RealUse = false;
9682 
9683   for (SDNode *Use : Ptr.getNode()->uses()) {
9684     if (Use == N)
9685       continue;
9686     if (N->hasPredecessorHelper(Use, Visited, Worklist))
9687       return false;
9688 
9689     // If Ptr may be folded in addressing mode of other use, then it's
9690     // not profitable to do this transformation.
9691     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
9692       RealUse = true;
9693   }
9694 
9695   if (!RealUse)
9696     return false;
9697 
9698   SDValue Result;
9699   if (isLoad)
9700     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9701                                 BasePtr, Offset, AM);
9702   else
9703     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9704                                  BasePtr, Offset, AM);
9705   ++PreIndexedNodes;
9706   ++NodesCombined;
9707   DEBUG(dbgs() << "\nReplacing.4 ";
9708         N->dump(&DAG);
9709         dbgs() << "\nWith: ";
9710         Result.getNode()->dump(&DAG);
9711         dbgs() << '\n');
9712   WorklistRemover DeadNodes(*this);
9713   if (isLoad) {
9714     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9715     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9716   } else {
9717     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9718   }
9719 
9720   // Finally, since the node is now dead, remove it from the graph.
9721   deleteAndRecombine(N);
9722 
9723   if (Swapped)
9724     std::swap(BasePtr, Offset);
9725 
9726   // Replace other uses of BasePtr that can be updated to use Ptr
9727   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
9728     unsigned OffsetIdx = 1;
9729     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
9730       OffsetIdx = 0;
9731     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
9732            BasePtr.getNode() && "Expected BasePtr operand");
9733 
9734     // We need to replace ptr0 in the following expression:
9735     //   x0 * offset0 + y0 * ptr0 = t0
9736     // knowing that
9737     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
9738     //
9739     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
9740     // indexed load/store and the expresion that needs to be re-written.
9741     //
9742     // Therefore, we have:
9743     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
9744 
9745     ConstantSDNode *CN =
9746       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
9747     int X0, X1, Y0, Y1;
9748     APInt Offset0 = CN->getAPIntValue();
9749     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
9750 
9751     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
9752     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
9753     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
9754     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
9755 
9756     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
9757 
9758     APInt CNV = Offset0;
9759     if (X0 < 0) CNV = -CNV;
9760     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
9761     else CNV = CNV - Offset1;
9762 
9763     SDLoc DL(OtherUses[i]);
9764 
9765     // We can now generate the new expression.
9766     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
9767     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
9768 
9769     SDValue NewUse = DAG.getNode(Opcode,
9770                                  DL,
9771                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
9772     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
9773     deleteAndRecombine(OtherUses[i]);
9774   }
9775 
9776   // Replace the uses of Ptr with uses of the updated base value.
9777   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
9778   deleteAndRecombine(Ptr.getNode());
9779 
9780   return true;
9781 }
9782 
9783 /// Try to combine a load/store with a add/sub of the base pointer node into a
9784 /// post-indexed load/store. The transformation folded the add/subtract into the
9785 /// new indexed load/store effectively and all of its uses are redirected to the
9786 /// new load/store.
9787 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
9788   if (Level < AfterLegalizeDAG)
9789     return false;
9790 
9791   bool isLoad = true;
9792   SDValue Ptr;
9793   EVT VT;
9794   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9795     if (LD->isIndexed())
9796       return false;
9797     VT = LD->getMemoryVT();
9798     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
9799         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
9800       return false;
9801     Ptr = LD->getBasePtr();
9802   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9803     if (ST->isIndexed())
9804       return false;
9805     VT = ST->getMemoryVT();
9806     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
9807         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
9808       return false;
9809     Ptr = ST->getBasePtr();
9810     isLoad = false;
9811   } else {
9812     return false;
9813   }
9814 
9815   if (Ptr.getNode()->hasOneUse())
9816     return false;
9817 
9818   for (SDNode *Op : Ptr.getNode()->uses()) {
9819     if (Op == N ||
9820         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
9821       continue;
9822 
9823     SDValue BasePtr;
9824     SDValue Offset;
9825     ISD::MemIndexedMode AM = ISD::UNINDEXED;
9826     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
9827       // Don't create a indexed load / store with zero offset.
9828       if (isNullConstant(Offset))
9829         continue;
9830 
9831       // Try turning it into a post-indexed load / store except when
9832       // 1) All uses are load / store ops that use it as base ptr (and
9833       //    it may be folded as addressing mmode).
9834       // 2) Op must be independent of N, i.e. Op is neither a predecessor
9835       //    nor a successor of N. Otherwise, if Op is folded that would
9836       //    create a cycle.
9837 
9838       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9839         continue;
9840 
9841       // Check for #1.
9842       bool TryNext = false;
9843       for (SDNode *Use : BasePtr.getNode()->uses()) {
9844         if (Use == Ptr.getNode())
9845           continue;
9846 
9847         // If all the uses are load / store addresses, then don't do the
9848         // transformation.
9849         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
9850           bool RealUse = false;
9851           for (SDNode *UseUse : Use->uses()) {
9852             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
9853               RealUse = true;
9854           }
9855 
9856           if (!RealUse) {
9857             TryNext = true;
9858             break;
9859           }
9860         }
9861       }
9862 
9863       if (TryNext)
9864         continue;
9865 
9866       // Check for #2
9867       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
9868         SDValue Result = isLoad
9869           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9870                                BasePtr, Offset, AM)
9871           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9872                                 BasePtr, Offset, AM);
9873         ++PostIndexedNodes;
9874         ++NodesCombined;
9875         DEBUG(dbgs() << "\nReplacing.5 ";
9876               N->dump(&DAG);
9877               dbgs() << "\nWith: ";
9878               Result.getNode()->dump(&DAG);
9879               dbgs() << '\n');
9880         WorklistRemover DeadNodes(*this);
9881         if (isLoad) {
9882           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9883           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9884         } else {
9885           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9886         }
9887 
9888         // Finally, since the node is now dead, remove it from the graph.
9889         deleteAndRecombine(N);
9890 
9891         // Replace the uses of Use with uses of the updated base value.
9892         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
9893                                       Result.getValue(isLoad ? 1 : 0));
9894         deleteAndRecombine(Op);
9895         return true;
9896       }
9897     }
9898   }
9899 
9900   return false;
9901 }
9902 
9903 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
9904 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
9905   ISD::MemIndexedMode AM = LD->getAddressingMode();
9906   assert(AM != ISD::UNINDEXED);
9907   SDValue BP = LD->getOperand(1);
9908   SDValue Inc = LD->getOperand(2);
9909 
9910   // Some backends use TargetConstants for load offsets, but don't expect
9911   // TargetConstants in general ADD nodes. We can convert these constants into
9912   // regular Constants (if the constant is not opaque).
9913   assert((Inc.getOpcode() != ISD::TargetConstant ||
9914           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
9915          "Cannot split out indexing using opaque target constants");
9916   if (Inc.getOpcode() == ISD::TargetConstant) {
9917     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
9918     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
9919                           ConstInc->getValueType(0));
9920   }
9921 
9922   unsigned Opc =
9923       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
9924   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
9925 }
9926 
9927 SDValue DAGCombiner::visitLOAD(SDNode *N) {
9928   LoadSDNode *LD  = cast<LoadSDNode>(N);
9929   SDValue Chain = LD->getChain();
9930   SDValue Ptr   = LD->getBasePtr();
9931 
9932   // If load is not volatile and there are no uses of the loaded value (and
9933   // the updated indexed value in case of indexed loads), change uses of the
9934   // chain value into uses of the chain input (i.e. delete the dead load).
9935   if (!LD->isVolatile()) {
9936     if (N->getValueType(1) == MVT::Other) {
9937       // Unindexed loads.
9938       if (!N->hasAnyUseOfValue(0)) {
9939         // It's not safe to use the two value CombineTo variant here. e.g.
9940         // v1, chain2 = load chain1, loc
9941         // v2, chain3 = load chain2, loc
9942         // v3         = add v2, c
9943         // Now we replace use of chain2 with chain1.  This makes the second load
9944         // isomorphic to the one we are deleting, and thus makes this load live.
9945         DEBUG(dbgs() << "\nReplacing.6 ";
9946               N->dump(&DAG);
9947               dbgs() << "\nWith chain: ";
9948               Chain.getNode()->dump(&DAG);
9949               dbgs() << "\n");
9950         WorklistRemover DeadNodes(*this);
9951         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
9952 
9953         if (N->use_empty())
9954           deleteAndRecombine(N);
9955 
9956         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9957       }
9958     } else {
9959       // Indexed loads.
9960       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
9961 
9962       // If this load has an opaque TargetConstant offset, then we cannot split
9963       // the indexing into an add/sub directly (that TargetConstant may not be
9964       // valid for a different type of node, and we cannot convert an opaque
9965       // target constant into a regular constant).
9966       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
9967                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
9968 
9969       if (!N->hasAnyUseOfValue(0) &&
9970           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
9971         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
9972         SDValue Index;
9973         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
9974           Index = SplitIndexingFromLoad(LD);
9975           // Try to fold the base pointer arithmetic into subsequent loads and
9976           // stores.
9977           AddUsersToWorklist(N);
9978         } else
9979           Index = DAG.getUNDEF(N->getValueType(1));
9980         DEBUG(dbgs() << "\nReplacing.7 ";
9981               N->dump(&DAG);
9982               dbgs() << "\nWith: ";
9983               Undef.getNode()->dump(&DAG);
9984               dbgs() << " and 2 other values\n");
9985         WorklistRemover DeadNodes(*this);
9986         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
9987         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
9988         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
9989         deleteAndRecombine(N);
9990         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9991       }
9992     }
9993   }
9994 
9995   // If this load is directly stored, replace the load value with the stored
9996   // value.
9997   // TODO: Handle store large -> read small portion.
9998   // TODO: Handle TRUNCSTORE/LOADEXT
9999   if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
10000     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10001       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
10002       if (PrevST->getBasePtr() == Ptr &&
10003           PrevST->getValue().getValueType() == N->getValueType(0))
10004       return CombineTo(N, Chain.getOperand(1), Chain);
10005     }
10006   }
10007 
10008   // Try to infer better alignment information than the load already has.
10009   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
10010     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
10011       if (Align > LD->getMemOperand()->getBaseAlignment()) {
10012         SDValue NewLoad =
10013                DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
10014                               LD->getValueType(0),
10015                               Chain, Ptr, LD->getPointerInfo(),
10016                               LD->getMemoryVT(),
10017                               LD->isVolatile(), LD->isNonTemporal(),
10018                               LD->isInvariant(), Align, LD->getAAInfo());
10019         if (NewLoad.getNode() != N)
10020           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
10021       }
10022     }
10023   }
10024 
10025   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10026                                                   : DAG.getSubtarget().useAA();
10027 #ifndef NDEBUG
10028   if (CombinerAAOnlyFunc.getNumOccurrences() &&
10029       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10030     UseAA = false;
10031 #endif
10032   if (UseAA && LD->isUnindexed()) {
10033     // Walk up chain skipping non-aliasing memory nodes.
10034     SDValue BetterChain = FindBetterChain(N, Chain);
10035 
10036     // If there is a better chain.
10037     if (Chain != BetterChain) {
10038       SDValue ReplLoad;
10039 
10040       // Replace the chain to void dependency.
10041       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
10042         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
10043                                BetterChain, Ptr, LD->getMemOperand());
10044       } else {
10045         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
10046                                   LD->getValueType(0),
10047                                   BetterChain, Ptr, LD->getMemoryVT(),
10048                                   LD->getMemOperand());
10049       }
10050 
10051       // Create token factor to keep old chain connected.
10052       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
10053                                   MVT::Other, Chain, ReplLoad.getValue(1));
10054 
10055       // Make sure the new and old chains are cleaned up.
10056       AddToWorklist(Token.getNode());
10057 
10058       // Replace uses with load result and token factor. Don't add users
10059       // to work list.
10060       return CombineTo(N, ReplLoad.getValue(0), Token, false);
10061     }
10062   }
10063 
10064   // Try transforming N to an indexed load.
10065   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10066     return SDValue(N, 0);
10067 
10068   // Try to slice up N to more direct loads if the slices are mapped to
10069   // different register banks or pairing can take place.
10070   if (SliceUpLoad(N))
10071     return SDValue(N, 0);
10072 
10073   return SDValue();
10074 }
10075 
10076 namespace {
10077 /// \brief Helper structure used to slice a load in smaller loads.
10078 /// Basically a slice is obtained from the following sequence:
10079 /// Origin = load Ty1, Base
10080 /// Shift = srl Ty1 Origin, CstTy Amount
10081 /// Inst = trunc Shift to Ty2
10082 ///
10083 /// Then, it will be rewriten into:
10084 /// Slice = load SliceTy, Base + SliceOffset
10085 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
10086 ///
10087 /// SliceTy is deduced from the number of bits that are actually used to
10088 /// build Inst.
10089 struct LoadedSlice {
10090   /// \brief Helper structure used to compute the cost of a slice.
10091   struct Cost {
10092     /// Are we optimizing for code size.
10093     bool ForCodeSize;
10094     /// Various cost.
10095     unsigned Loads;
10096     unsigned Truncates;
10097     unsigned CrossRegisterBanksCopies;
10098     unsigned ZExts;
10099     unsigned Shift;
10100 
10101     Cost(bool ForCodeSize = false)
10102         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
10103           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
10104 
10105     /// \brief Get the cost of one isolated slice.
10106     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
10107         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
10108           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
10109       EVT TruncType = LS.Inst->getValueType(0);
10110       EVT LoadedType = LS.getLoadedType();
10111       if (TruncType != LoadedType &&
10112           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
10113         ZExts = 1;
10114     }
10115 
10116     /// \brief Account for slicing gain in the current cost.
10117     /// Slicing provide a few gains like removing a shift or a
10118     /// truncate. This method allows to grow the cost of the original
10119     /// load with the gain from this slice.
10120     void addSliceGain(const LoadedSlice &LS) {
10121       // Each slice saves a truncate.
10122       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
10123       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
10124                               LS.Inst->getValueType(0)))
10125         ++Truncates;
10126       // If there is a shift amount, this slice gets rid of it.
10127       if (LS.Shift)
10128         ++Shift;
10129       // If this slice can merge a cross register bank copy, account for it.
10130       if (LS.canMergeExpensiveCrossRegisterBankCopy())
10131         ++CrossRegisterBanksCopies;
10132     }
10133 
10134     Cost &operator+=(const Cost &RHS) {
10135       Loads += RHS.Loads;
10136       Truncates += RHS.Truncates;
10137       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
10138       ZExts += RHS.ZExts;
10139       Shift += RHS.Shift;
10140       return *this;
10141     }
10142 
10143     bool operator==(const Cost &RHS) const {
10144       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
10145              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
10146              ZExts == RHS.ZExts && Shift == RHS.Shift;
10147     }
10148 
10149     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
10150 
10151     bool operator<(const Cost &RHS) const {
10152       // Assume cross register banks copies are as expensive as loads.
10153       // FIXME: Do we want some more target hooks?
10154       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
10155       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
10156       // Unless we are optimizing for code size, consider the
10157       // expensive operation first.
10158       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
10159         return ExpensiveOpsLHS < ExpensiveOpsRHS;
10160       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
10161              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
10162     }
10163 
10164     bool operator>(const Cost &RHS) const { return RHS < *this; }
10165 
10166     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
10167 
10168     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
10169   };
10170   // The last instruction that represent the slice. This should be a
10171   // truncate instruction.
10172   SDNode *Inst;
10173   // The original load instruction.
10174   LoadSDNode *Origin;
10175   // The right shift amount in bits from the original load.
10176   unsigned Shift;
10177   // The DAG from which Origin came from.
10178   // This is used to get some contextual information about legal types, etc.
10179   SelectionDAG *DAG;
10180 
10181   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
10182               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
10183       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
10184 
10185   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
10186   /// \return Result is \p BitWidth and has used bits set to 1 and
10187   ///         not used bits set to 0.
10188   APInt getUsedBits() const {
10189     // Reproduce the trunc(lshr) sequence:
10190     // - Start from the truncated value.
10191     // - Zero extend to the desired bit width.
10192     // - Shift left.
10193     assert(Origin && "No original load to compare against.");
10194     unsigned BitWidth = Origin->getValueSizeInBits(0);
10195     assert(Inst && "This slice is not bound to an instruction");
10196     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
10197            "Extracted slice is bigger than the whole type!");
10198     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
10199     UsedBits.setAllBits();
10200     UsedBits = UsedBits.zext(BitWidth);
10201     UsedBits <<= Shift;
10202     return UsedBits;
10203   }
10204 
10205   /// \brief Get the size of the slice to be loaded in bytes.
10206   unsigned getLoadedSize() const {
10207     unsigned SliceSize = getUsedBits().countPopulation();
10208     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
10209     return SliceSize / 8;
10210   }
10211 
10212   /// \brief Get the type that will be loaded for this slice.
10213   /// Note: This may not be the final type for the slice.
10214   EVT getLoadedType() const {
10215     assert(DAG && "Missing context");
10216     LLVMContext &Ctxt = *DAG->getContext();
10217     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
10218   }
10219 
10220   /// \brief Get the alignment of the load used for this slice.
10221   unsigned getAlignment() const {
10222     unsigned Alignment = Origin->getAlignment();
10223     unsigned Offset = getOffsetFromBase();
10224     if (Offset != 0)
10225       Alignment = MinAlign(Alignment, Alignment + Offset);
10226     return Alignment;
10227   }
10228 
10229   /// \brief Check if this slice can be rewritten with legal operations.
10230   bool isLegal() const {
10231     // An invalid slice is not legal.
10232     if (!Origin || !Inst || !DAG)
10233       return false;
10234 
10235     // Offsets are for indexed load only, we do not handle that.
10236     if (Origin->getOffset().getOpcode() != ISD::UNDEF)
10237       return false;
10238 
10239     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10240 
10241     // Check that the type is legal.
10242     EVT SliceType = getLoadedType();
10243     if (!TLI.isTypeLegal(SliceType))
10244       return false;
10245 
10246     // Check that the load is legal for this type.
10247     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
10248       return false;
10249 
10250     // Check that the offset can be computed.
10251     // 1. Check its type.
10252     EVT PtrType = Origin->getBasePtr().getValueType();
10253     if (PtrType == MVT::Untyped || PtrType.isExtended())
10254       return false;
10255 
10256     // 2. Check that it fits in the immediate.
10257     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
10258       return false;
10259 
10260     // 3. Check that the computation is legal.
10261     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
10262       return false;
10263 
10264     // Check that the zext is legal if it needs one.
10265     EVT TruncateType = Inst->getValueType(0);
10266     if (TruncateType != SliceType &&
10267         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
10268       return false;
10269 
10270     return true;
10271   }
10272 
10273   /// \brief Get the offset in bytes of this slice in the original chunk of
10274   /// bits.
10275   /// \pre DAG != nullptr.
10276   uint64_t getOffsetFromBase() const {
10277     assert(DAG && "Missing context.");
10278     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
10279     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
10280     uint64_t Offset = Shift / 8;
10281     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
10282     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
10283            "The size of the original loaded type is not a multiple of a"
10284            " byte.");
10285     // If Offset is bigger than TySizeInBytes, it means we are loading all
10286     // zeros. This should have been optimized before in the process.
10287     assert(TySizeInBytes > Offset &&
10288            "Invalid shift amount for given loaded size");
10289     if (IsBigEndian)
10290       Offset = TySizeInBytes - Offset - getLoadedSize();
10291     return Offset;
10292   }
10293 
10294   /// \brief Generate the sequence of instructions to load the slice
10295   /// represented by this object and redirect the uses of this slice to
10296   /// this new sequence of instructions.
10297   /// \pre this->Inst && this->Origin are valid Instructions and this
10298   /// object passed the legal check: LoadedSlice::isLegal returned true.
10299   /// \return The last instruction of the sequence used to load the slice.
10300   SDValue loadSlice() const {
10301     assert(Inst && Origin && "Unable to replace a non-existing slice.");
10302     const SDValue &OldBaseAddr = Origin->getBasePtr();
10303     SDValue BaseAddr = OldBaseAddr;
10304     // Get the offset in that chunk of bytes w.r.t. the endianess.
10305     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
10306     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
10307     if (Offset) {
10308       // BaseAddr = BaseAddr + Offset.
10309       EVT ArithType = BaseAddr.getValueType();
10310       SDLoc DL(Origin);
10311       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
10312                               DAG->getConstant(Offset, DL, ArithType));
10313     }
10314 
10315     // Create the type of the loaded slice according to its size.
10316     EVT SliceType = getLoadedType();
10317 
10318     // Create the load for the slice.
10319     SDValue LastInst = DAG->getLoad(
10320         SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
10321         Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
10322         Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
10323     // If the final type is not the same as the loaded type, this means that
10324     // we have to pad with zero. Create a zero extend for that.
10325     EVT FinalType = Inst->getValueType(0);
10326     if (SliceType != FinalType)
10327       LastInst =
10328           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
10329     return LastInst;
10330   }
10331 
10332   /// \brief Check if this slice can be merged with an expensive cross register
10333   /// bank copy. E.g.,
10334   /// i = load i32
10335   /// f = bitcast i32 i to float
10336   bool canMergeExpensiveCrossRegisterBankCopy() const {
10337     if (!Inst || !Inst->hasOneUse())
10338       return false;
10339     SDNode *Use = *Inst->use_begin();
10340     if (Use->getOpcode() != ISD::BITCAST)
10341       return false;
10342     assert(DAG && "Missing context");
10343     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10344     EVT ResVT = Use->getValueType(0);
10345     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
10346     const TargetRegisterClass *ArgRC =
10347         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
10348     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
10349       return false;
10350 
10351     // At this point, we know that we perform a cross-register-bank copy.
10352     // Check if it is expensive.
10353     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
10354     // Assume bitcasts are cheap, unless both register classes do not
10355     // explicitly share a common sub class.
10356     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
10357       return false;
10358 
10359     // Check if it will be merged with the load.
10360     // 1. Check the alignment constraint.
10361     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
10362         ResVT.getTypeForEVT(*DAG->getContext()));
10363 
10364     if (RequiredAlignment > getAlignment())
10365       return false;
10366 
10367     // 2. Check that the load is a legal operation for that type.
10368     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
10369       return false;
10370 
10371     // 3. Check that we do not have a zext in the way.
10372     if (Inst->getValueType(0) != getLoadedType())
10373       return false;
10374 
10375     return true;
10376   }
10377 };
10378 }
10379 
10380 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
10381 /// \p UsedBits looks like 0..0 1..1 0..0.
10382 static bool areUsedBitsDense(const APInt &UsedBits) {
10383   // If all the bits are one, this is dense!
10384   if (UsedBits.isAllOnesValue())
10385     return true;
10386 
10387   // Get rid of the unused bits on the right.
10388   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
10389   // Get rid of the unused bits on the left.
10390   if (NarrowedUsedBits.countLeadingZeros())
10391     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
10392   // Check that the chunk of bits is completely used.
10393   return NarrowedUsedBits.isAllOnesValue();
10394 }
10395 
10396 /// \brief Check whether or not \p First and \p Second are next to each other
10397 /// in memory. This means that there is no hole between the bits loaded
10398 /// by \p First and the bits loaded by \p Second.
10399 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10400                                      const LoadedSlice &Second) {
10401   assert(First.Origin == Second.Origin && First.Origin &&
10402          "Unable to match different memory origins.");
10403   APInt UsedBits = First.getUsedBits();
10404   assert((UsedBits & Second.getUsedBits()) == 0 &&
10405          "Slices are not supposed to overlap.");
10406   UsedBits |= Second.getUsedBits();
10407   return areUsedBitsDense(UsedBits);
10408 }
10409 
10410 /// \brief Adjust the \p GlobalLSCost according to the target
10411 /// paring capabilities and the layout of the slices.
10412 /// \pre \p GlobalLSCost should account for at least as many loads as
10413 /// there is in the slices in \p LoadedSlices.
10414 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10415                                  LoadedSlice::Cost &GlobalLSCost) {
10416   unsigned NumberOfSlices = LoadedSlices.size();
10417   // If there is less than 2 elements, no pairing is possible.
10418   if (NumberOfSlices < 2)
10419     return;
10420 
10421   // Sort the slices so that elements that are likely to be next to each
10422   // other in memory are next to each other in the list.
10423   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10424             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10425     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10426     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10427   });
10428   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10429   // First (resp. Second) is the first (resp. Second) potentially candidate
10430   // to be placed in a paired load.
10431   const LoadedSlice *First = nullptr;
10432   const LoadedSlice *Second = nullptr;
10433   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10434                 // Set the beginning of the pair.
10435                                                            First = Second) {
10436 
10437     Second = &LoadedSlices[CurrSlice];
10438 
10439     // If First is NULL, it means we start a new pair.
10440     // Get to the next slice.
10441     if (!First)
10442       continue;
10443 
10444     EVT LoadedType = First->getLoadedType();
10445 
10446     // If the types of the slices are different, we cannot pair them.
10447     if (LoadedType != Second->getLoadedType())
10448       continue;
10449 
10450     // Check if the target supplies paired loads for this type.
10451     unsigned RequiredAlignment = 0;
10452     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10453       // move to the next pair, this type is hopeless.
10454       Second = nullptr;
10455       continue;
10456     }
10457     // Check if we meet the alignment requirement.
10458     if (RequiredAlignment > First->getAlignment())
10459       continue;
10460 
10461     // Check that both loads are next to each other in memory.
10462     if (!areSlicesNextToEachOther(*First, *Second))
10463       continue;
10464 
10465     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10466     --GlobalLSCost.Loads;
10467     // Move to the next pair.
10468     Second = nullptr;
10469   }
10470 }
10471 
10472 /// \brief Check the profitability of all involved LoadedSlice.
10473 /// Currently, it is considered profitable if there is exactly two
10474 /// involved slices (1) which are (2) next to each other in memory, and
10475 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10476 ///
10477 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10478 /// the elements themselves.
10479 ///
10480 /// FIXME: When the cost model will be mature enough, we can relax
10481 /// constraints (1) and (2).
10482 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10483                                 const APInt &UsedBits, bool ForCodeSize) {
10484   unsigned NumberOfSlices = LoadedSlices.size();
10485   if (StressLoadSlicing)
10486     return NumberOfSlices > 1;
10487 
10488   // Check (1).
10489   if (NumberOfSlices != 2)
10490     return false;
10491 
10492   // Check (2).
10493   if (!areUsedBitsDense(UsedBits))
10494     return false;
10495 
10496   // Check (3).
10497   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10498   // The original code has one big load.
10499   OrigCost.Loads = 1;
10500   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10501     const LoadedSlice &LS = LoadedSlices[CurrSlice];
10502     // Accumulate the cost of all the slices.
10503     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10504     GlobalSlicingCost += SliceCost;
10505 
10506     // Account as cost in the original configuration the gain obtained
10507     // with the current slices.
10508     OrigCost.addSliceGain(LS);
10509   }
10510 
10511   // If the target supports paired load, adjust the cost accordingly.
10512   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10513   return OrigCost > GlobalSlicingCost;
10514 }
10515 
10516 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10517 /// operations, split it in the various pieces being extracted.
10518 ///
10519 /// This sort of thing is introduced by SROA.
10520 /// This slicing takes care not to insert overlapping loads.
10521 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10522 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10523   if (Level < AfterLegalizeDAG)
10524     return false;
10525 
10526   LoadSDNode *LD = cast<LoadSDNode>(N);
10527   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10528       !LD->getValueType(0).isInteger())
10529     return false;
10530 
10531   // Keep track of already used bits to detect overlapping values.
10532   // In that case, we will just abort the transformation.
10533   APInt UsedBits(LD->getValueSizeInBits(0), 0);
10534 
10535   SmallVector<LoadedSlice, 4> LoadedSlices;
10536 
10537   // Check if this load is used as several smaller chunks of bits.
10538   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10539   // of computation for each trunc.
10540   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10541        UI != UIEnd; ++UI) {
10542     // Skip the uses of the chain.
10543     if (UI.getUse().getResNo() != 0)
10544       continue;
10545 
10546     SDNode *User = *UI;
10547     unsigned Shift = 0;
10548 
10549     // Check if this is a trunc(lshr).
10550     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10551         isa<ConstantSDNode>(User->getOperand(1))) {
10552       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10553       User = *User->use_begin();
10554     }
10555 
10556     // At this point, User is a Truncate, iff we encountered, trunc or
10557     // trunc(lshr).
10558     if (User->getOpcode() != ISD::TRUNCATE)
10559       return false;
10560 
10561     // The width of the type must be a power of 2 and greater than 8-bits.
10562     // Otherwise the load cannot be represented in LLVM IR.
10563     // Moreover, if we shifted with a non-8-bits multiple, the slice
10564     // will be across several bytes. We do not support that.
10565     unsigned Width = User->getValueSizeInBits(0);
10566     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10567       return 0;
10568 
10569     // Build the slice for this chain of computations.
10570     LoadedSlice LS(User, LD, Shift, &DAG);
10571     APInt CurrentUsedBits = LS.getUsedBits();
10572 
10573     // Check if this slice overlaps with another.
10574     if ((CurrentUsedBits & UsedBits) != 0)
10575       return false;
10576     // Update the bits used globally.
10577     UsedBits |= CurrentUsedBits;
10578 
10579     // Check if the new slice would be legal.
10580     if (!LS.isLegal())
10581       return false;
10582 
10583     // Record the slice.
10584     LoadedSlices.push_back(LS);
10585   }
10586 
10587   // Abort slicing if it does not seem to be profitable.
10588   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10589     return false;
10590 
10591   ++SlicedLoads;
10592 
10593   // Rewrite each chain to use an independent load.
10594   // By construction, each chain can be represented by a unique load.
10595 
10596   // Prepare the argument for the new token factor for all the slices.
10597   SmallVector<SDValue, 8> ArgChains;
10598   for (SmallVectorImpl<LoadedSlice>::const_iterator
10599            LSIt = LoadedSlices.begin(),
10600            LSItEnd = LoadedSlices.end();
10601        LSIt != LSItEnd; ++LSIt) {
10602     SDValue SliceInst = LSIt->loadSlice();
10603     CombineTo(LSIt->Inst, SliceInst, true);
10604     if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
10605       SliceInst = SliceInst.getOperand(0);
10606     assert(SliceInst->getOpcode() == ISD::LOAD &&
10607            "It takes more than a zext to get to the loaded slice!!");
10608     ArgChains.push_back(SliceInst.getValue(1));
10609   }
10610 
10611   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
10612                               ArgChains);
10613   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10614   return true;
10615 }
10616 
10617 /// Check to see if V is (and load (ptr), imm), where the load is having
10618 /// specific bytes cleared out.  If so, return the byte size being masked out
10619 /// and the shift amount.
10620 static std::pair<unsigned, unsigned>
10621 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
10622   std::pair<unsigned, unsigned> Result(0, 0);
10623 
10624   // Check for the structure we're looking for.
10625   if (V->getOpcode() != ISD::AND ||
10626       !isa<ConstantSDNode>(V->getOperand(1)) ||
10627       !ISD::isNormalLoad(V->getOperand(0).getNode()))
10628     return Result;
10629 
10630   // Check the chain and pointer.
10631   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
10632   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
10633 
10634   // The store should be chained directly to the load or be an operand of a
10635   // tokenfactor.
10636   if (LD == Chain.getNode())
10637     ; // ok.
10638   else if (Chain->getOpcode() != ISD::TokenFactor)
10639     return Result; // Fail.
10640   else {
10641     bool isOk = false;
10642     for (const SDValue &ChainOp : Chain->op_values())
10643       if (ChainOp.getNode() == LD) {
10644         isOk = true;
10645         break;
10646       }
10647     if (!isOk) return Result;
10648   }
10649 
10650   // This only handles simple types.
10651   if (V.getValueType() != MVT::i16 &&
10652       V.getValueType() != MVT::i32 &&
10653       V.getValueType() != MVT::i64)
10654     return Result;
10655 
10656   // Check the constant mask.  Invert it so that the bits being masked out are
10657   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
10658   // follow the sign bit for uniformity.
10659   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
10660   unsigned NotMaskLZ = countLeadingZeros(NotMask);
10661   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
10662   unsigned NotMaskTZ = countTrailingZeros(NotMask);
10663   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
10664   if (NotMaskLZ == 64) return Result;  // All zero mask.
10665 
10666   // See if we have a continuous run of bits.  If so, we have 0*1+0*
10667   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
10668     return Result;
10669 
10670   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
10671   if (V.getValueType() != MVT::i64 && NotMaskLZ)
10672     NotMaskLZ -= 64-V.getValueSizeInBits();
10673 
10674   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
10675   switch (MaskedBytes) {
10676   case 1:
10677   case 2:
10678   case 4: break;
10679   default: return Result; // All one mask, or 5-byte mask.
10680   }
10681 
10682   // Verify that the first bit starts at a multiple of mask so that the access
10683   // is aligned the same as the access width.
10684   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
10685 
10686   Result.first = MaskedBytes;
10687   Result.second = NotMaskTZ/8;
10688   return Result;
10689 }
10690 
10691 
10692 /// Check to see if IVal is something that provides a value as specified by
10693 /// MaskInfo. If so, replace the specified store with a narrower store of
10694 /// truncated IVal.
10695 static SDNode *
10696 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
10697                                 SDValue IVal, StoreSDNode *St,
10698                                 DAGCombiner *DC) {
10699   unsigned NumBytes = MaskInfo.first;
10700   unsigned ByteShift = MaskInfo.second;
10701   SelectionDAG &DAG = DC->getDAG();
10702 
10703   // Check to see if IVal is all zeros in the part being masked in by the 'or'
10704   // that uses this.  If not, this is not a replacement.
10705   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
10706                                   ByteShift*8, (ByteShift+NumBytes)*8);
10707   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
10708 
10709   // Check that it is legal on the target to do this.  It is legal if the new
10710   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
10711   // legalization.
10712   MVT VT = MVT::getIntegerVT(NumBytes*8);
10713   if (!DC->isTypeLegal(VT))
10714     return nullptr;
10715 
10716   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
10717   // shifted by ByteShift and truncated down to NumBytes.
10718   if (ByteShift) {
10719     SDLoc DL(IVal);
10720     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
10721                        DAG.getConstant(ByteShift*8, DL,
10722                                     DC->getShiftAmountTy(IVal.getValueType())));
10723   }
10724 
10725   // Figure out the offset for the store and the alignment of the access.
10726   unsigned StOffset;
10727   unsigned NewAlign = St->getAlignment();
10728 
10729   if (DAG.getDataLayout().isLittleEndian())
10730     StOffset = ByteShift;
10731   else
10732     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
10733 
10734   SDValue Ptr = St->getBasePtr();
10735   if (StOffset) {
10736     SDLoc DL(IVal);
10737     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
10738                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
10739     NewAlign = MinAlign(NewAlign, StOffset);
10740   }
10741 
10742   // Truncate down to the new size.
10743   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
10744 
10745   ++OpsNarrowed;
10746   return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
10747                       St->getPointerInfo().getWithOffset(StOffset),
10748                       false, false, NewAlign).getNode();
10749 }
10750 
10751 
10752 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
10753 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
10754 /// narrowing the load and store if it would end up being a win for performance
10755 /// or code size.
10756 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
10757   StoreSDNode *ST  = cast<StoreSDNode>(N);
10758   if (ST->isVolatile())
10759     return SDValue();
10760 
10761   SDValue Chain = ST->getChain();
10762   SDValue Value = ST->getValue();
10763   SDValue Ptr   = ST->getBasePtr();
10764   EVT VT = Value.getValueType();
10765 
10766   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
10767     return SDValue();
10768 
10769   unsigned Opc = Value.getOpcode();
10770 
10771   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
10772   // is a byte mask indicating a consecutive number of bytes, check to see if
10773   // Y is known to provide just those bytes.  If so, we try to replace the
10774   // load + replace + store sequence with a single (narrower) store, which makes
10775   // the load dead.
10776   if (Opc == ISD::OR) {
10777     std::pair<unsigned, unsigned> MaskedLoad;
10778     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
10779     if (MaskedLoad.first)
10780       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10781                                                   Value.getOperand(1), ST,this))
10782         return SDValue(NewST, 0);
10783 
10784     // Or is commutative, so try swapping X and Y.
10785     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
10786     if (MaskedLoad.first)
10787       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10788                                                   Value.getOperand(0), ST,this))
10789         return SDValue(NewST, 0);
10790   }
10791 
10792   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
10793       Value.getOperand(1).getOpcode() != ISD::Constant)
10794     return SDValue();
10795 
10796   SDValue N0 = Value.getOperand(0);
10797   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10798       Chain == SDValue(N0.getNode(), 1)) {
10799     LoadSDNode *LD = cast<LoadSDNode>(N0);
10800     if (LD->getBasePtr() != Ptr ||
10801         LD->getPointerInfo().getAddrSpace() !=
10802         ST->getPointerInfo().getAddrSpace())
10803       return SDValue();
10804 
10805     // Find the type to narrow it the load / op / store to.
10806     SDValue N1 = Value.getOperand(1);
10807     unsigned BitWidth = N1.getValueSizeInBits();
10808     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
10809     if (Opc == ISD::AND)
10810       Imm ^= APInt::getAllOnesValue(BitWidth);
10811     if (Imm == 0 || Imm.isAllOnesValue())
10812       return SDValue();
10813     unsigned ShAmt = Imm.countTrailingZeros();
10814     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
10815     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
10816     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10817     // The narrowing should be profitable, the load/store operation should be
10818     // legal (or custom) and the store size should be equal to the NewVT width.
10819     while (NewBW < BitWidth &&
10820            (NewVT.getStoreSizeInBits() != NewBW ||
10821             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
10822             !TLI.isNarrowingProfitable(VT, NewVT))) {
10823       NewBW = NextPowerOf2(NewBW);
10824       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10825     }
10826     if (NewBW >= BitWidth)
10827       return SDValue();
10828 
10829     // If the lsb changed does not start at the type bitwidth boundary,
10830     // start at the previous one.
10831     if (ShAmt % NewBW)
10832       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
10833     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
10834                                    std::min(BitWidth, ShAmt + NewBW));
10835     if ((Imm & Mask) == Imm) {
10836       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
10837       if (Opc == ISD::AND)
10838         NewImm ^= APInt::getAllOnesValue(NewBW);
10839       uint64_t PtrOff = ShAmt / 8;
10840       // For big endian targets, we need to adjust the offset to the pointer to
10841       // load the correct bytes.
10842       if (DAG.getDataLayout().isBigEndian())
10843         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
10844 
10845       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
10846       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
10847       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
10848         return SDValue();
10849 
10850       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
10851                                    Ptr.getValueType(), Ptr,
10852                                    DAG.getConstant(PtrOff, SDLoc(LD),
10853                                                    Ptr.getValueType()));
10854       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
10855                                   LD->getChain(), NewPtr,
10856                                   LD->getPointerInfo().getWithOffset(PtrOff),
10857                                   LD->isVolatile(), LD->isNonTemporal(),
10858                                   LD->isInvariant(), NewAlign,
10859                                   LD->getAAInfo());
10860       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
10861                                    DAG.getConstant(NewImm, SDLoc(Value),
10862                                                    NewVT));
10863       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
10864                                    NewVal, NewPtr,
10865                                    ST->getPointerInfo().getWithOffset(PtrOff),
10866                                    false, false, NewAlign);
10867 
10868       AddToWorklist(NewPtr.getNode());
10869       AddToWorklist(NewLD.getNode());
10870       AddToWorklist(NewVal.getNode());
10871       WorklistRemover DeadNodes(*this);
10872       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
10873       ++OpsNarrowed;
10874       return NewST;
10875     }
10876   }
10877 
10878   return SDValue();
10879 }
10880 
10881 /// For a given floating point load / store pair, if the load value isn't used
10882 /// by any other operations, then consider transforming the pair to integer
10883 /// load / store operations if the target deems the transformation profitable.
10884 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
10885   StoreSDNode *ST  = cast<StoreSDNode>(N);
10886   SDValue Chain = ST->getChain();
10887   SDValue Value = ST->getValue();
10888   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
10889       Value.hasOneUse() &&
10890       Chain == SDValue(Value.getNode(), 1)) {
10891     LoadSDNode *LD = cast<LoadSDNode>(Value);
10892     EVT VT = LD->getMemoryVT();
10893     if (!VT.isFloatingPoint() ||
10894         VT != ST->getMemoryVT() ||
10895         LD->isNonTemporal() ||
10896         ST->isNonTemporal() ||
10897         LD->getPointerInfo().getAddrSpace() != 0 ||
10898         ST->getPointerInfo().getAddrSpace() != 0)
10899       return SDValue();
10900 
10901     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10902     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
10903         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
10904         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
10905         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
10906       return SDValue();
10907 
10908     unsigned LDAlign = LD->getAlignment();
10909     unsigned STAlign = ST->getAlignment();
10910     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
10911     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
10912     if (LDAlign < ABIAlign || STAlign < ABIAlign)
10913       return SDValue();
10914 
10915     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
10916                                 LD->getChain(), LD->getBasePtr(),
10917                                 LD->getPointerInfo(),
10918                                 false, false, false, LDAlign);
10919 
10920     SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
10921                                  NewLD, ST->getBasePtr(),
10922                                  ST->getPointerInfo(),
10923                                  false, false, STAlign);
10924 
10925     AddToWorklist(NewLD.getNode());
10926     AddToWorklist(NewST.getNode());
10927     WorklistRemover DeadNodes(*this);
10928     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
10929     ++LdStFP2Int;
10930     return NewST;
10931   }
10932 
10933   return SDValue();
10934 }
10935 
10936 namespace {
10937 /// Helper struct to parse and store a memory address as base + index + offset.
10938 /// We ignore sign extensions when it is safe to do so.
10939 /// The following two expressions are not equivalent. To differentiate we need
10940 /// to store whether there was a sign extension involved in the index
10941 /// computation.
10942 ///  (load (i64 add (i64 copyfromreg %c)
10943 ///                 (i64 signextend (add (i8 load %index)
10944 ///                                      (i8 1))))
10945 /// vs
10946 ///
10947 /// (load (i64 add (i64 copyfromreg %c)
10948 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
10949 ///                                         (i32 1)))))
10950 struct BaseIndexOffset {
10951   SDValue Base;
10952   SDValue Index;
10953   int64_t Offset;
10954   bool IsIndexSignExt;
10955 
10956   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
10957 
10958   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
10959                   bool IsIndexSignExt) :
10960     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
10961 
10962   bool equalBaseIndex(const BaseIndexOffset &Other) {
10963     return Other.Base == Base && Other.Index == Index &&
10964       Other.IsIndexSignExt == IsIndexSignExt;
10965   }
10966 
10967   /// Parses tree in Ptr for base, index, offset addresses.
10968   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
10969     bool IsIndexSignExt = false;
10970 
10971     // Split up a folded GlobalAddress+Offset into its component parts.
10972     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
10973       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
10974         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
10975                                                     SDLoc(GA),
10976                                                     GA->getValueType(0),
10977                                                     /*Offset=*/0,
10978                                                     /*isTargetGA=*/false,
10979                                                     GA->getTargetFlags()),
10980                                SDValue(),
10981                                GA->getOffset(),
10982                                IsIndexSignExt);
10983       }
10984 
10985     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
10986     // instruction, then it could be just the BASE or everything else we don't
10987     // know how to handle. Just use Ptr as BASE and give up.
10988     if (Ptr->getOpcode() != ISD::ADD)
10989       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
10990 
10991     // We know that we have at least an ADD instruction. Try to pattern match
10992     // the simple case of BASE + OFFSET.
10993     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
10994       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
10995       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
10996                               IsIndexSignExt);
10997     }
10998 
10999     // Inside a loop the current BASE pointer is calculated using an ADD and a
11000     // MUL instruction. In this case Ptr is the actual BASE pointer.
11001     // (i64 add (i64 %array_ptr)
11002     //          (i64 mul (i64 %induction_var)
11003     //                   (i64 %element_size)))
11004     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
11005       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11006 
11007     // Look at Base + Index + Offset cases.
11008     SDValue Base = Ptr->getOperand(0);
11009     SDValue IndexOffset = Ptr->getOperand(1);
11010 
11011     // Skip signextends.
11012     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
11013       IndexOffset = IndexOffset->getOperand(0);
11014       IsIndexSignExt = true;
11015     }
11016 
11017     // Either the case of Base + Index (no offset) or something else.
11018     if (IndexOffset->getOpcode() != ISD::ADD)
11019       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
11020 
11021     // Now we have the case of Base + Index + offset.
11022     SDValue Index = IndexOffset->getOperand(0);
11023     SDValue Offset = IndexOffset->getOperand(1);
11024 
11025     if (!isa<ConstantSDNode>(Offset))
11026       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11027 
11028     // Ignore signextends.
11029     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
11030       Index = Index->getOperand(0);
11031       IsIndexSignExt = true;
11032     } else IsIndexSignExt = false;
11033 
11034     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
11035     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
11036   }
11037 };
11038 } // namespace
11039 
11040 // This is a helper function for visitMUL to check the profitability
11041 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
11042 // MulNode is the original multiply, AddNode is (add x, c1),
11043 // and ConstNode is c2.
11044 //
11045 // If the (add x, c1) has multiple uses, we could increase
11046 // the number of adds if we make this transformation.
11047 // It would only be worth doing this if we can remove a
11048 // multiply in the process. Check for that here.
11049 // To illustrate:
11050 //     (A + c1) * c3
11051 //     (A + c2) * c3
11052 // We're checking for cases where we have common "c3 * A" expressions.
11053 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
11054                                               SDValue &AddNode,
11055                                               SDValue &ConstNode) {
11056   APInt Val;
11057 
11058   // If the add only has one use, this would be OK to do.
11059   if (AddNode.getNode()->hasOneUse())
11060     return true;
11061 
11062   // Walk all the users of the constant with which we're multiplying.
11063   for (SDNode *Use : ConstNode->uses()) {
11064 
11065     if (Use == MulNode) // This use is the one we're on right now. Skip it.
11066       continue;
11067 
11068     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
11069       SDNode *OtherOp;
11070       SDNode *MulVar = AddNode.getOperand(0).getNode();
11071 
11072       // OtherOp is what we're multiplying against the constant.
11073       if (Use->getOperand(0) == ConstNode)
11074         OtherOp = Use->getOperand(1).getNode();
11075       else
11076         OtherOp = Use->getOperand(0).getNode();
11077 
11078       // Check to see if multiply is with the same operand of our "add".
11079       //
11080       //     ConstNode  = CONST
11081       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
11082       //     ...
11083       //     AddNode  = (A + c1)  <-- MulVar is A.
11084       //         = AddNode * ConstNode   <-- current visiting instruction.
11085       //
11086       // If we make this transformation, we will have a common
11087       // multiply (ConstNode * A) that we can save.
11088       if (OtherOp == MulVar)
11089         return true;
11090 
11091       // Now check to see if a future expansion will give us a common
11092       // multiply.
11093       //
11094       //     ConstNode  = CONST
11095       //     AddNode    = (A + c1)
11096       //     ...   = AddNode * ConstNode <-- current visiting instruction.
11097       //     ...
11098       //     OtherOp = (A + c2)
11099       //     Use     = OtherOp * ConstNode <-- visiting Use.
11100       //
11101       // If we make this transformation, we will have a common
11102       // multiply (CONST * A) after we also do the same transformation
11103       // to the "t2" instruction.
11104       if (OtherOp->getOpcode() == ISD::ADD &&
11105           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
11106           OtherOp->getOperand(0).getNode() == MulVar)
11107         return true;
11108     }
11109   }
11110 
11111   // Didn't find a case where this would be profitable.
11112   return false;
11113 }
11114 
11115 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
11116                                                   SDLoc SL,
11117                                                   ArrayRef<MemOpLink> Stores,
11118                                                   SmallVectorImpl<SDValue> &Chains,
11119                                                   EVT Ty) const {
11120   SmallVector<SDValue, 8> BuildVector;
11121 
11122   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11123     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
11124     Chains.push_back(St->getChain());
11125     BuildVector.push_back(St->getValue());
11126   }
11127 
11128   return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
11129 }
11130 
11131 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
11132                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
11133                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
11134   // Make sure we have something to merge.
11135   if (NumStores < 2)
11136     return false;
11137 
11138   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11139   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11140   unsigned LatestNodeUsed = 0;
11141 
11142   for (unsigned i=0; i < NumStores; ++i) {
11143     // Find a chain for the new wide-store operand. Notice that some
11144     // of the store nodes that we found may not be selected for inclusion
11145     // in the wide store. The chain we use needs to be the chain of the
11146     // latest store node which is *used* and replaced by the wide store.
11147     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11148       LatestNodeUsed = i;
11149   }
11150 
11151   SmallVector<SDValue, 8> Chains;
11152 
11153   // The latest Node in the DAG.
11154   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11155   SDLoc DL(StoreNodes[0].MemNode);
11156 
11157   SDValue StoredVal;
11158   if (UseVector) {
11159     bool IsVec = MemVT.isVector();
11160     unsigned Elts = NumStores;
11161     if (IsVec) {
11162       // When merging vector stores, get the total number of elements.
11163       Elts *= MemVT.getVectorNumElements();
11164     }
11165     // Get the type for the merged vector store.
11166     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11167     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
11168 
11169     if (IsConstantSrc) {
11170       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
11171     } else {
11172       SmallVector<SDValue, 8> Ops;
11173       for (unsigned i = 0; i < NumStores; ++i) {
11174         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11175         SDValue Val = St->getValue();
11176         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
11177         if (Val.getValueType() != MemVT)
11178           return false;
11179         Ops.push_back(Val);
11180         Chains.push_back(St->getChain());
11181       }
11182 
11183       // Build the extracted vector elements back into a vector.
11184       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
11185                               DL, Ty, Ops);    }
11186   } else {
11187     // We should always use a vector store when merging extracted vector
11188     // elements, so this path implies a store of constants.
11189     assert(IsConstantSrc && "Merged vector elements should use vector store");
11190 
11191     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
11192     APInt StoreInt(SizeInBits, 0);
11193 
11194     // Construct a single integer constant which is made of the smaller
11195     // constant inputs.
11196     bool IsLE = DAG.getDataLayout().isLittleEndian();
11197     for (unsigned i = 0; i < NumStores; ++i) {
11198       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
11199       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
11200       Chains.push_back(St->getChain());
11201 
11202       SDValue Val = St->getValue();
11203       StoreInt <<= ElementSizeBytes * 8;
11204       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
11205         StoreInt |= C->getAPIntValue().zext(SizeInBits);
11206       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
11207         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
11208       } else {
11209         llvm_unreachable("Invalid constant element type");
11210       }
11211     }
11212 
11213     // Create the new Load and Store operations.
11214     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11215     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
11216   }
11217 
11218   assert(!Chains.empty());
11219 
11220   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11221   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
11222                                   FirstInChain->getBasePtr(),
11223                                   FirstInChain->getPointerInfo(),
11224                                   false, false,
11225                                   FirstInChain->getAlignment());
11226 
11227   // Replace the last store with the new store
11228   CombineTo(LatestOp, NewStore);
11229   // Erase all other stores.
11230   for (unsigned i = 0; i < NumStores; ++i) {
11231     if (StoreNodes[i].MemNode == LatestOp)
11232       continue;
11233     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11234     // ReplaceAllUsesWith will replace all uses that existed when it was
11235     // called, but graph optimizations may cause new ones to appear. For
11236     // example, the case in pr14333 looks like
11237     //
11238     //  St's chain -> St -> another store -> X
11239     //
11240     // And the only difference from St to the other store is the chain.
11241     // When we change it's chain to be St's chain they become identical,
11242     // get CSEed and the net result is that X is now a use of St.
11243     // Since we know that St is redundant, just iterate.
11244     while (!St->use_empty())
11245       DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11246     deleteAndRecombine(St);
11247   }
11248 
11249   return true;
11250 }
11251 
11252 void DAGCombiner::getStoreMergeAndAliasCandidates(
11253     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
11254     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
11255   // This holds the base pointer, index, and the offset in bytes from the base
11256   // pointer.
11257   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11258 
11259   // We must have a base and an offset.
11260   if (!BasePtr.Base.getNode())
11261     return;
11262 
11263   // Do not handle stores to undef base pointers.
11264   if (BasePtr.Base.isUndef())
11265     return;
11266 
11267   // Walk up the chain and look for nodes with offsets from the same
11268   // base pointer. Stop when reaching an instruction with a different kind
11269   // or instruction which has a different base pointer.
11270   EVT MemVT = St->getMemoryVT();
11271   unsigned Seq = 0;
11272   StoreSDNode *Index = St;
11273 
11274 
11275   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11276                                                   : DAG.getSubtarget().useAA();
11277 
11278   if (UseAA) {
11279     // Look at other users of the same chain. Stores on the same chain do not
11280     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11281     // to be on the same chain, so don't bother looking at adjacent chains.
11282 
11283     SDValue Chain = St->getChain();
11284     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11285       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
11286         if (I.getOperandNo() != 0)
11287           continue;
11288 
11289         if (OtherST->isVolatile() || OtherST->isIndexed())
11290           continue;
11291 
11292         if (OtherST->getMemoryVT() != MemVT)
11293           continue;
11294 
11295         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11296 
11297         if (Ptr.equalBaseIndex(BasePtr))
11298           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11299       }
11300     }
11301 
11302     return;
11303   }
11304 
11305   while (Index) {
11306     // If the chain has more than one use, then we can't reorder the mem ops.
11307     if (Index != St && !SDValue(Index, 0)->hasOneUse())
11308       break;
11309 
11310     // Find the base pointer and offset for this memory node.
11311     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11312 
11313     // Check that the base pointer is the same as the original one.
11314     if (!Ptr.equalBaseIndex(BasePtr))
11315       break;
11316 
11317     // The memory operands must not be volatile.
11318     if (Index->isVolatile() || Index->isIndexed())
11319       break;
11320 
11321     // No truncation.
11322     if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
11323       if (St->isTruncatingStore())
11324         break;
11325 
11326     // The stored memory type must be the same.
11327     if (Index->getMemoryVT() != MemVT)
11328       break;
11329 
11330     // We do not allow under-aligned stores in order to prevent
11331     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11332     // be irrelevant here; what MATTERS is that we not move memory
11333     // operations that potentially overlap past each-other.
11334     if (Index->getAlignment() < MemVT.getStoreSize())
11335       break;
11336 
11337     // We found a potential memory operand to merge.
11338     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11339 
11340     // Find the next memory operand in the chain. If the next operand in the
11341     // chain is a store then move up and continue the scan with the next
11342     // memory operand. If the next operand is a load save it and use alias
11343     // information to check if it interferes with anything.
11344     SDNode *NextInChain = Index->getChain().getNode();
11345     while (1) {
11346       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
11347         // We found a store node. Use it for the next iteration.
11348         Index = STn;
11349         break;
11350       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
11351         if (Ldn->isVolatile()) {
11352           Index = nullptr;
11353           break;
11354         }
11355 
11356         // Save the load node for later. Continue the scan.
11357         AliasLoadNodes.push_back(Ldn);
11358         NextInChain = Ldn->getChain().getNode();
11359         continue;
11360       } else {
11361         Index = nullptr;
11362         break;
11363       }
11364     }
11365   }
11366 }
11367 
11368 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
11369   if (OptLevel == CodeGenOpt::None)
11370     return false;
11371 
11372   EVT MemVT = St->getMemoryVT();
11373   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11374   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
11375       Attribute::NoImplicitFloat);
11376 
11377   // This function cannot currently deal with non-byte-sized memory sizes.
11378   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
11379     return false;
11380 
11381   if (!MemVT.isSimple())
11382     return false;
11383 
11384   // Perform an early exit check. Do not bother looking at stored values that
11385   // are not constants, loads, or extracted vector elements.
11386   SDValue StoredVal = St->getValue();
11387   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
11388   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
11389                        isa<ConstantFPSDNode>(StoredVal);
11390   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11391                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
11392 
11393   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
11394     return false;
11395 
11396   // Don't merge vectors into wider vectors if the source data comes from loads.
11397   // TODO: This restriction can be lifted by using logic similar to the
11398   // ExtractVecSrc case.
11399   if (MemVT.isVector() && IsLoadSrc)
11400     return false;
11401 
11402   // Only look at ends of store sequences.
11403   SDValue Chain = SDValue(St, 0);
11404   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
11405     return false;
11406 
11407   // Save the LoadSDNodes that we find in the chain.
11408   // We need to make sure that these nodes do not interfere with
11409   // any of the store nodes.
11410   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
11411 
11412   // Save the StoreSDNodes that we find in the chain.
11413   SmallVector<MemOpLink, 8> StoreNodes;
11414 
11415   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
11416 
11417   // Check if there is anything to merge.
11418   if (StoreNodes.size() < 2)
11419     return false;
11420 
11421   // Sort the memory operands according to their distance from the
11422   // base pointer.  As a secondary criteria: make sure stores coming
11423   // later in the code come first in the list. This is important for
11424   // the non-UseAA case, because we're merging stores into the FINAL
11425   // store along a chain which potentially contains aliasing stores.
11426   // Thus, if there are multiple stores to the same address, the last
11427   // one can be considered for merging but not the others.
11428   std::sort(StoreNodes.begin(), StoreNodes.end(),
11429             [](MemOpLink LHS, MemOpLink RHS) {
11430     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
11431            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
11432             LHS.SequenceNum < RHS.SequenceNum);
11433   });
11434 
11435   // Scan the memory operations on the chain and find the first non-consecutive
11436   // store memory address.
11437   unsigned LastConsecutiveStore = 0;
11438   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
11439   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
11440 
11441     // Check that the addresses are consecutive starting from the second
11442     // element in the list of stores.
11443     if (i > 0) {
11444       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
11445       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11446         break;
11447     }
11448 
11449     // Check if this store interferes with any of the loads that we found.
11450     // If we find a load that alias with this store. Stop the sequence.
11451     if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
11452                     [&](LSBaseSDNode* Ldn) {
11453                       return isAlias(Ldn, StoreNodes[i].MemNode);
11454                     }))
11455       break;
11456 
11457     // Mark this node as useful.
11458     LastConsecutiveStore = i;
11459   }
11460 
11461   // The node with the lowest store address.
11462   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11463   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
11464   unsigned FirstStoreAlign = FirstInChain->getAlignment();
11465   LLVMContext &Context = *DAG.getContext();
11466   const DataLayout &DL = DAG.getDataLayout();
11467 
11468   // Store the constants into memory as one consecutive store.
11469   if (IsConstantSrc) {
11470     unsigned LastLegalType = 0;
11471     unsigned LastLegalVectorType = 0;
11472     bool NonZero = false;
11473     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11474       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11475       SDValue StoredVal = St->getValue();
11476 
11477       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
11478         NonZero |= !C->isNullValue();
11479       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
11480         NonZero |= !C->getConstantFPValue()->isNullValue();
11481       } else {
11482         // Non-constant.
11483         break;
11484       }
11485 
11486       // Find a legal type for the constant store.
11487       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11488       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11489       bool IsFast;
11490       if (TLI.isTypeLegal(StoreTy) &&
11491           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11492                                  FirstStoreAlign, &IsFast) && IsFast) {
11493         LastLegalType = i+1;
11494       // Or check whether a truncstore is legal.
11495       } else if (TLI.getTypeAction(Context, StoreTy) ==
11496                  TargetLowering::TypePromoteInteger) {
11497         EVT LegalizedStoredValueTy =
11498           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
11499         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11500             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11501                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
11502             IsFast) {
11503           LastLegalType = i + 1;
11504         }
11505       }
11506 
11507       // We only use vectors if the constant is known to be zero or the target
11508       // allows it and the function is not marked with the noimplicitfloat
11509       // attribute.
11510       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
11511                                                         FirstStoreAS)) &&
11512           !NoVectors) {
11513         // Find a legal type for the vector store.
11514         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
11515         if (TLI.isTypeLegal(Ty) &&
11516             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11517                                    FirstStoreAlign, &IsFast) && IsFast)
11518           LastLegalVectorType = i + 1;
11519       }
11520     }
11521 
11522     // Check if we found a legal integer type to store.
11523     if (LastLegalType == 0 && LastLegalVectorType == 0)
11524       return false;
11525 
11526     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
11527     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
11528 
11529     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11530                                            true, UseVector);
11531   }
11532 
11533   // When extracting multiple vector elements, try to store them
11534   // in one vector store rather than a sequence of scalar stores.
11535   if (IsExtractVecSrc) {
11536     unsigned NumStoresToMerge = 0;
11537     bool IsVec = MemVT.isVector();
11538     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
11539       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11540       unsigned StoreValOpcode = St->getValue().getOpcode();
11541       // This restriction could be loosened.
11542       // Bail out if any stored values are not elements extracted from a vector.
11543       // It should be possible to handle mixed sources, but load sources need
11544       // more careful handling (see the block of code below that handles
11545       // consecutive loads).
11546       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
11547           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
11548         return false;
11549 
11550       // Find a legal type for the vector store.
11551       unsigned Elts = i + 1;
11552       if (IsVec) {
11553         // When merging vector stores, get the total number of elements.
11554         Elts *= MemVT.getVectorNumElements();
11555       }
11556       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11557       bool IsFast;
11558       if (TLI.isTypeLegal(Ty) &&
11559           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11560                                  FirstStoreAlign, &IsFast) && IsFast)
11561         NumStoresToMerge = i + 1;
11562     }
11563 
11564     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
11565                                            false, true);
11566   }
11567 
11568   // Below we handle the case of multiple consecutive stores that
11569   // come from multiple consecutive loads. We merge them into a single
11570   // wide load and a single wide store.
11571 
11572   // Look for load nodes which are used by the stored values.
11573   SmallVector<MemOpLink, 8> LoadNodes;
11574 
11575   // Find acceptable loads. Loads need to have the same chain (token factor),
11576   // must not be zext, volatile, indexed, and they must be consecutive.
11577   BaseIndexOffset LdBasePtr;
11578   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11579     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11580     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11581     if (!Ld) break;
11582 
11583     // Loads must only have one use.
11584     if (!Ld->hasNUsesOfValue(1, 0))
11585       break;
11586 
11587     // The memory operands must not be volatile.
11588     if (Ld->isVolatile() || Ld->isIndexed())
11589       break;
11590 
11591     // We do not accept ext loads.
11592     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11593       break;
11594 
11595     // The stored memory type must be the same.
11596     if (Ld->getMemoryVT() != MemVT)
11597       break;
11598 
11599     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
11600     // If this is not the first ptr that we check.
11601     if (LdBasePtr.Base.getNode()) {
11602       // The base ptr must be the same.
11603       if (!LdPtr.equalBaseIndex(LdBasePtr))
11604         break;
11605     } else {
11606       // Check that all other base pointers are the same as this one.
11607       LdBasePtr = LdPtr;
11608     }
11609 
11610     // We found a potential memory operand to merge.
11611     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11612   }
11613 
11614   if (LoadNodes.size() < 2)
11615     return false;
11616 
11617   // If we have load/store pair instructions and we only have two values,
11618   // don't bother.
11619   unsigned RequiredAlignment;
11620   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11621       St->getAlignment() >= RequiredAlignment)
11622     return false;
11623 
11624   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11625   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11626   unsigned FirstLoadAlign = FirstLoad->getAlignment();
11627 
11628   // Scan the memory operations on the chain and find the first non-consecutive
11629   // load memory address. These variables hold the index in the store node
11630   // array.
11631   unsigned LastConsecutiveLoad = 0;
11632   // This variable refers to the size and not index in the array.
11633   unsigned LastLegalVectorType = 0;
11634   unsigned LastLegalIntegerType = 0;
11635   StartAddress = LoadNodes[0].OffsetFromBase;
11636   SDValue FirstChain = FirstLoad->getChain();
11637   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11638     // All loads must share the same chain.
11639     if (LoadNodes[i].MemNode->getChain() != FirstChain)
11640       break;
11641 
11642     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11643     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11644       break;
11645     LastConsecutiveLoad = i;
11646     // Find a legal type for the vector store.
11647     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
11648     bool IsFastSt, IsFastLd;
11649     if (TLI.isTypeLegal(StoreTy) &&
11650         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11651                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11652         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11653                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
11654       LastLegalVectorType = i + 1;
11655     }
11656 
11657     // Find a legal type for the integer store.
11658     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11659     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11660     if (TLI.isTypeLegal(StoreTy) &&
11661         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11662                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11663         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11664                                FirstLoadAlign, &IsFastLd) && IsFastLd)
11665       LastLegalIntegerType = i + 1;
11666     // Or check whether a truncstore and extload is legal.
11667     else if (TLI.getTypeAction(Context, StoreTy) ==
11668              TargetLowering::TypePromoteInteger) {
11669       EVT LegalizedStoredValueTy =
11670         TLI.getTypeToTransformTo(Context, StoreTy);
11671       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11672           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11673           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11674           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11675           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11676                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
11677           IsFastSt &&
11678           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11679                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
11680           IsFastLd)
11681         LastLegalIntegerType = i+1;
11682     }
11683   }
11684 
11685   // Only use vector types if the vector type is larger than the integer type.
11686   // If they are the same, use integers.
11687   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
11688   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
11689 
11690   // We add +1 here because the LastXXX variables refer to location while
11691   // the NumElem refers to array/index size.
11692   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
11693   NumElem = std::min(LastLegalType, NumElem);
11694 
11695   if (NumElem < 2)
11696     return false;
11697 
11698   // Collect the chains from all merged stores.
11699   SmallVector<SDValue, 8> MergeStoreChains;
11700   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
11701 
11702   // The latest Node in the DAG.
11703   unsigned LatestNodeUsed = 0;
11704   for (unsigned i=1; i<NumElem; ++i) {
11705     // Find a chain for the new wide-store operand. Notice that some
11706     // of the store nodes that we found may not be selected for inclusion
11707     // in the wide store. The chain we use needs to be the chain of the
11708     // latest store node which is *used* and replaced by the wide store.
11709     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11710       LatestNodeUsed = i;
11711 
11712     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
11713   }
11714 
11715   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11716 
11717   // Find if it is better to use vectors or integers to load and store
11718   // to memory.
11719   EVT JointMemOpVT;
11720   if (UseVectorTy) {
11721     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
11722   } else {
11723     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
11724     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
11725   }
11726 
11727   SDLoc LoadDL(LoadNodes[0].MemNode);
11728   SDLoc StoreDL(StoreNodes[0].MemNode);
11729 
11730   // The merged loads are required to have the same incoming chain, so
11731   // using the first's chain is acceptable.
11732   SDValue NewLoad = DAG.getLoad(
11733       JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
11734       FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
11735 
11736   SDValue NewStoreChain =
11737     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
11738 
11739   SDValue NewStore = DAG.getStore(
11740     NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
11741       FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
11742 
11743   // Transfer chain users from old loads to the new load.
11744   for (unsigned i = 0; i < NumElem; ++i) {
11745     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
11746     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
11747                                   SDValue(NewLoad.getNode(), 1));
11748   }
11749 
11750   // Replace the last store with the new store.
11751   CombineTo(LatestOp, NewStore);
11752   // Erase all other stores.
11753   for (unsigned i = 0; i < NumElem ; ++i) {
11754     // Remove all Store nodes.
11755     if (StoreNodes[i].MemNode == LatestOp)
11756       continue;
11757     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11758     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
11759     deleteAndRecombine(St);
11760   }
11761 
11762   return true;
11763 }
11764 
11765 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
11766   SDLoc SL(ST);
11767   SDValue ReplStore;
11768 
11769   // Replace the chain to avoid dependency.
11770   if (ST->isTruncatingStore()) {
11771     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
11772                                   ST->getBasePtr(), ST->getMemoryVT(),
11773                                   ST->getMemOperand());
11774   } else {
11775     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
11776                              ST->getMemOperand());
11777   }
11778 
11779   // Create token to keep both nodes around.
11780   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
11781                               MVT::Other, ST->getChain(), ReplStore);
11782 
11783   // Make sure the new and old chains are cleaned up.
11784   AddToWorklist(Token.getNode());
11785 
11786   // Don't add users to work list.
11787   return CombineTo(ST, Token, false);
11788 }
11789 
11790 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
11791   SDValue Value = ST->getValue();
11792   if (Value.getOpcode() == ISD::TargetConstantFP)
11793     return SDValue();
11794 
11795   SDLoc DL(ST);
11796 
11797   SDValue Chain = ST->getChain();
11798   SDValue Ptr = ST->getBasePtr();
11799 
11800   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
11801 
11802   // NOTE: If the original store is volatile, this transform must not increase
11803   // the number of stores.  For example, on x86-32 an f64 can be stored in one
11804   // processor operation but an i64 (which is not legal) requires two.  So the
11805   // transform should not be done in this case.
11806 
11807   SDValue Tmp;
11808   switch (CFP->getSimpleValueType(0).SimpleTy) {
11809   default:
11810     llvm_unreachable("Unknown FP type");
11811   case MVT::f16:    // We don't do this for these yet.
11812   case MVT::f80:
11813   case MVT::f128:
11814   case MVT::ppcf128:
11815     return SDValue();
11816   case MVT::f32:
11817     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
11818         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
11819       ;
11820       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
11821                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
11822                             MVT::i32);
11823       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
11824     }
11825 
11826     return SDValue();
11827   case MVT::f64:
11828     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
11829          !ST->isVolatile()) ||
11830         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
11831       ;
11832       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
11833                             getZExtValue(), SDLoc(CFP), MVT::i64);
11834       return DAG.getStore(Chain, DL, Tmp,
11835                           Ptr, ST->getMemOperand());
11836     }
11837 
11838     if (!ST->isVolatile() &&
11839         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
11840       // Many FP stores are not made apparent until after legalize, e.g. for
11841       // argument passing.  Since this is so common, custom legalize the
11842       // 64-bit integer store into two 32-bit stores.
11843       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
11844       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
11845       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
11846       if (DAG.getDataLayout().isBigEndian())
11847         std::swap(Lo, Hi);
11848 
11849       unsigned Alignment = ST->getAlignment();
11850       bool isVolatile = ST->isVolatile();
11851       bool isNonTemporal = ST->isNonTemporal();
11852       AAMDNodes AAInfo = ST->getAAInfo();
11853 
11854       SDValue St0 = DAG.getStore(Chain, DL, Lo,
11855                                  Ptr, ST->getPointerInfo(),
11856                                  isVolatile, isNonTemporal,
11857                                  ST->getAlignment(), AAInfo);
11858       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
11859                         DAG.getConstant(4, DL, Ptr.getValueType()));
11860       Alignment = MinAlign(Alignment, 4U);
11861       SDValue St1 = DAG.getStore(Chain, DL, Hi,
11862                                  Ptr, ST->getPointerInfo().getWithOffset(4),
11863                                  isVolatile, isNonTemporal,
11864                                  Alignment, AAInfo);
11865       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11866                          St0, St1);
11867     }
11868 
11869     return SDValue();
11870   }
11871 }
11872 
11873 SDValue DAGCombiner::visitSTORE(SDNode *N) {
11874   StoreSDNode *ST  = cast<StoreSDNode>(N);
11875   SDValue Chain = ST->getChain();
11876   SDValue Value = ST->getValue();
11877   SDValue Ptr   = ST->getBasePtr();
11878 
11879   // If this is a store of a bit convert, store the input value if the
11880   // resultant store does not need a higher alignment than the original.
11881   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
11882       ST->isUnindexed()) {
11883     unsigned OrigAlign = ST->getAlignment();
11884     EVT SVT = Value.getOperand(0).getValueType();
11885     unsigned Align = DAG.getDataLayout().getABITypeAlignment(
11886         SVT.getTypeForEVT(*DAG.getContext()));
11887     if (Align <= OrigAlign &&
11888         ((!LegalOperations && !ST->isVolatile()) ||
11889          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
11890       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
11891                           Ptr, ST->getPointerInfo(), ST->isVolatile(),
11892                           ST->isNonTemporal(), OrigAlign,
11893                           ST->getAAInfo());
11894   }
11895 
11896   // Turn 'store undef, Ptr' -> nothing.
11897   if (Value.isUndef() && ST->isUnindexed())
11898     return Chain;
11899 
11900   // Try to infer better alignment information than the store already has.
11901   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
11902     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11903       if (Align > ST->getAlignment()) {
11904         SDValue NewStore =
11905                DAG.getTruncStore(Chain, SDLoc(N), Value,
11906                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
11907                                  ST->isVolatile(), ST->isNonTemporal(), Align,
11908                                  ST->getAAInfo());
11909         if (NewStore.getNode() != N)
11910           return CombineTo(ST, NewStore, true);
11911       }
11912     }
11913   }
11914 
11915   // Try transforming a pair floating point load / store ops to integer
11916   // load / store ops.
11917   if (SDValue NewST = TransformFPLoadStorePair(N))
11918     return NewST;
11919 
11920   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11921                                                   : DAG.getSubtarget().useAA();
11922 #ifndef NDEBUG
11923   if (CombinerAAOnlyFunc.getNumOccurrences() &&
11924       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
11925     UseAA = false;
11926 #endif
11927   if (UseAA && ST->isUnindexed()) {
11928     // FIXME: We should do this even without AA enabled. AA will just allow
11929     // FindBetterChain to work in more situations. The problem with this is that
11930     // any combine that expects memory operations to be on consecutive chains
11931     // first needs to be updated to look for users of the same chain.
11932 
11933     // Walk up chain skipping non-aliasing memory nodes, on this store and any
11934     // adjacent stores.
11935     if (findBetterNeighborChains(ST)) {
11936       // replaceStoreChain uses CombineTo, which handled all of the worklist
11937       // manipulation. Return the original node to not do anything else.
11938       return SDValue(ST, 0);
11939     }
11940   }
11941 
11942   // Try transforming N to an indexed store.
11943   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11944     return SDValue(N, 0);
11945 
11946   // FIXME: is there such a thing as a truncating indexed store?
11947   if (ST->isTruncatingStore() && ST->isUnindexed() &&
11948       Value.getValueType().isInteger()) {
11949     // See if we can simplify the input to this truncstore with knowledge that
11950     // only the low bits are being used.  For example:
11951     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
11952     SDValue Shorter =
11953       GetDemandedBits(Value,
11954                       APInt::getLowBitsSet(
11955                         Value.getValueType().getScalarType().getSizeInBits(),
11956                         ST->getMemoryVT().getScalarType().getSizeInBits()));
11957     AddToWorklist(Value.getNode());
11958     if (Shorter.getNode())
11959       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
11960                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
11961 
11962     // Otherwise, see if we can simplify the operation with
11963     // SimplifyDemandedBits, which only works if the value has a single use.
11964     if (SimplifyDemandedBits(Value,
11965                         APInt::getLowBitsSet(
11966                           Value.getValueType().getScalarType().getSizeInBits(),
11967                           ST->getMemoryVT().getScalarType().getSizeInBits())))
11968       return SDValue(N, 0);
11969   }
11970 
11971   // If this is a load followed by a store to the same location, then the store
11972   // is dead/noop.
11973   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
11974     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
11975         ST->isUnindexed() && !ST->isVolatile() &&
11976         // There can't be any side effects between the load and store, such as
11977         // a call or store.
11978         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
11979       // The store is dead, remove it.
11980       return Chain;
11981     }
11982   }
11983 
11984   // If this is a store followed by a store with the same value to the same
11985   // location, then the store is dead/noop.
11986   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
11987     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
11988         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
11989         ST1->isUnindexed() && !ST1->isVolatile()) {
11990       // The store is dead, remove it.
11991       return Chain;
11992     }
11993   }
11994 
11995   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
11996   // truncating store.  We can do this even if this is already a truncstore.
11997   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
11998       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
11999       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12000                             ST->getMemoryVT())) {
12001     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12002                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12003   }
12004 
12005   // Only perform this optimization before the types are legal, because we
12006   // don't want to perform this optimization on every DAGCombine invocation.
12007   if (!LegalTypes) {
12008     bool EverChanged = false;
12009 
12010     do {
12011       // There can be multiple store sequences on the same chain.
12012       // Keep trying to merge store sequences until we are unable to do so
12013       // or until we merge the last store on the chain.
12014       bool Changed = MergeConsecutiveStores(ST);
12015       EverChanged |= Changed;
12016       if (!Changed) break;
12017     } while (ST->getOpcode() != ISD::DELETED_NODE);
12018 
12019     if (EverChanged)
12020       return SDValue(N, 0);
12021   }
12022 
12023   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12024   //
12025   // Make sure to do this only after attempting to merge stores in order to
12026   //  avoid changing the types of some subset of stores due to visit order,
12027   //  preventing their merging.
12028   if (isa<ConstantFPSDNode>(Value)) {
12029     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12030       return NewSt;
12031   }
12032 
12033   return ReduceLoadOpStoreWidth(N);
12034 }
12035 
12036 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
12037   SDValue InVec = N->getOperand(0);
12038   SDValue InVal = N->getOperand(1);
12039   SDValue EltNo = N->getOperand(2);
12040   SDLoc dl(N);
12041 
12042   // If the inserted element is an UNDEF, just use the input vector.
12043   if (InVal.isUndef())
12044     return InVec;
12045 
12046   EVT VT = InVec.getValueType();
12047 
12048   // If we can't generate a legal BUILD_VECTOR, exit
12049   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
12050     return SDValue();
12051 
12052   // Check that we know which element is being inserted
12053   if (!isa<ConstantSDNode>(EltNo))
12054     return SDValue();
12055   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12056 
12057   // Canonicalize insert_vector_elt dag nodes.
12058   // Example:
12059   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
12060   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
12061   //
12062   // Do this only if the child insert_vector node has one use; also
12063   // do this only if indices are both constants and Idx1 < Idx0.
12064   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
12065       && isa<ConstantSDNode>(InVec.getOperand(2))) {
12066     unsigned OtherElt =
12067       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
12068     if (Elt < OtherElt) {
12069       // Swap nodes.
12070       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
12071                                   InVec.getOperand(0), InVal, EltNo);
12072       AddToWorklist(NewOp.getNode());
12073       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
12074                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
12075     }
12076   }
12077 
12078   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
12079   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
12080   // vector elements.
12081   SmallVector<SDValue, 8> Ops;
12082   // Do not combine these two vectors if the output vector will not replace
12083   // the input vector.
12084   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
12085     Ops.append(InVec.getNode()->op_begin(),
12086                InVec.getNode()->op_end());
12087   } else if (InVec.isUndef()) {
12088     unsigned NElts = VT.getVectorNumElements();
12089     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
12090   } else {
12091     return SDValue();
12092   }
12093 
12094   // Insert the element
12095   if (Elt < Ops.size()) {
12096     // All the operands of BUILD_VECTOR must have the same type;
12097     // we enforce that here.
12098     EVT OpVT = Ops[0].getValueType();
12099     if (InVal.getValueType() != OpVT)
12100       InVal = OpVT.bitsGT(InVal.getValueType()) ?
12101                 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
12102                 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
12103     Ops[Elt] = InVal;
12104   }
12105 
12106   // Return the new vector
12107   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
12108 }
12109 
12110 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
12111     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
12112   EVT ResultVT = EVE->getValueType(0);
12113   EVT VecEltVT = InVecVT.getVectorElementType();
12114   unsigned Align = OriginalLoad->getAlignment();
12115   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
12116       VecEltVT.getTypeForEVT(*DAG.getContext()));
12117 
12118   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12119     return SDValue();
12120 
12121   Align = NewAlign;
12122 
12123   SDValue NewPtr = OriginalLoad->getBasePtr();
12124   SDValue Offset;
12125   EVT PtrType = NewPtr.getValueType();
12126   MachinePointerInfo MPI;
12127   SDLoc DL(EVE);
12128   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12129     int Elt = ConstEltNo->getZExtValue();
12130     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12131     Offset = DAG.getConstant(PtrOff, DL, PtrType);
12132     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12133   } else {
12134     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
12135     Offset = DAG.getNode(
12136         ISD::MUL, DL, PtrType, Offset,
12137         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
12138     MPI = OriginalLoad->getPointerInfo();
12139   }
12140   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
12141 
12142   // The replacement we need to do here is a little tricky: we need to
12143   // replace an extractelement of a load with a load.
12144   // Use ReplaceAllUsesOfValuesWith to do the replacement.
12145   // Note that this replacement assumes that the extractvalue is the only
12146   // use of the load; that's okay because we don't want to perform this
12147   // transformation in other cases anyway.
12148   SDValue Load;
12149   SDValue Chain;
12150   if (ResultVT.bitsGT(VecEltVT)) {
12151     // If the result type of vextract is wider than the load, then issue an
12152     // extending load instead.
12153     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
12154                                                   VecEltVT)
12155                                    ? ISD::ZEXTLOAD
12156                                    : ISD::EXTLOAD;
12157     Load = DAG.getExtLoad(
12158         ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
12159         VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
12160         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
12161     Chain = Load.getValue(1);
12162   } else {
12163     Load = DAG.getLoad(
12164         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
12165         OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
12166         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
12167     Chain = Load.getValue(1);
12168     if (ResultVT.bitsLT(VecEltVT))
12169       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
12170     else
12171       Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
12172   }
12173   WorklistRemover DeadNodes(*this);
12174   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
12175   SDValue To[] = { Load, Chain };
12176   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
12177   // Since we're explicitly calling ReplaceAllUses, add the new node to the
12178   // worklist explicitly as well.
12179   AddToWorklist(Load.getNode());
12180   AddUsersToWorklist(Load.getNode()); // Add users too
12181   // Make sure to revisit this node to clean it up; it will usually be dead.
12182   AddToWorklist(EVE);
12183   ++OpsNarrowed;
12184   return SDValue(EVE, 0);
12185 }
12186 
12187 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
12188   // (vextract (scalar_to_vector val, 0) -> val
12189   SDValue InVec = N->getOperand(0);
12190   EVT VT = InVec.getValueType();
12191   EVT NVT = N->getValueType(0);
12192 
12193   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
12194     // Check if the result type doesn't match the inserted element type. A
12195     // SCALAR_TO_VECTOR may truncate the inserted element and the
12196     // EXTRACT_VECTOR_ELT may widen the extracted vector.
12197     SDValue InOp = InVec.getOperand(0);
12198     if (InOp.getValueType() != NVT) {
12199       assert(InOp.getValueType().isInteger() && NVT.isInteger());
12200       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
12201     }
12202     return InOp;
12203   }
12204 
12205   SDValue EltNo = N->getOperand(1);
12206   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
12207 
12208   // extract_vector_elt (build_vector x, y), 1 -> y
12209   if (ConstEltNo &&
12210       InVec.getOpcode() == ISD::BUILD_VECTOR &&
12211       TLI.isTypeLegal(VT) &&
12212       (InVec.hasOneUse() ||
12213        TLI.aggressivelyPreferBuildVectorSources(VT))) {
12214     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
12215     EVT InEltVT = Elt.getValueType();
12216 
12217     // Sometimes build_vector's scalar input types do not match result type.
12218     if (NVT == InEltVT)
12219       return Elt;
12220 
12221     // TODO: It may be useful to truncate if free if the build_vector implicitly
12222     // converts.
12223   }
12224 
12225   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
12226   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
12227       ConstEltNo->isNullValue() && VT.isInteger()) {
12228     SDValue BCSrc = InVec.getOperand(0);
12229     if (BCSrc.getValueType().isScalarInteger())
12230       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
12231   }
12232 
12233   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
12234   // We only perform this optimization before the op legalization phase because
12235   // we may introduce new vector instructions which are not backed by TD
12236   // patterns. For example on AVX, extracting elements from a wide vector
12237   // without using extract_subvector. However, if we can find an underlying
12238   // scalar value, then we can always use that.
12239   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
12240     int NumElem = VT.getVectorNumElements();
12241     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
12242     // Find the new index to extract from.
12243     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
12244 
12245     // Extracting an undef index is undef.
12246     if (OrigElt == -1)
12247       return DAG.getUNDEF(NVT);
12248 
12249     // Select the right vector half to extract from.
12250     SDValue SVInVec;
12251     if (OrigElt < NumElem) {
12252       SVInVec = InVec->getOperand(0);
12253     } else {
12254       SVInVec = InVec->getOperand(1);
12255       OrigElt -= NumElem;
12256     }
12257 
12258     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
12259       SDValue InOp = SVInVec.getOperand(OrigElt);
12260       if (InOp.getValueType() != NVT) {
12261         assert(InOp.getValueType().isInteger() && NVT.isInteger());
12262         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
12263       }
12264 
12265       return InOp;
12266     }
12267 
12268     // FIXME: We should handle recursing on other vector shuffles and
12269     // scalar_to_vector here as well.
12270 
12271     if (!LegalOperations) {
12272       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12273       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
12274                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
12275     }
12276   }
12277 
12278   bool BCNumEltsChanged = false;
12279   EVT ExtVT = VT.getVectorElementType();
12280   EVT LVT = ExtVT;
12281 
12282   // If the result of load has to be truncated, then it's not necessarily
12283   // profitable.
12284   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
12285     return SDValue();
12286 
12287   if (InVec.getOpcode() == ISD::BITCAST) {
12288     // Don't duplicate a load with other uses.
12289     if (!InVec.hasOneUse())
12290       return SDValue();
12291 
12292     EVT BCVT = InVec.getOperand(0).getValueType();
12293     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
12294       return SDValue();
12295     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
12296       BCNumEltsChanged = true;
12297     InVec = InVec.getOperand(0);
12298     ExtVT = BCVT.getVectorElementType();
12299   }
12300 
12301   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
12302   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
12303       ISD::isNormalLoad(InVec.getNode()) &&
12304       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
12305     SDValue Index = N->getOperand(1);
12306     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
12307       return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
12308                                                            OrigLoad);
12309   }
12310 
12311   // Perform only after legalization to ensure build_vector / vector_shuffle
12312   // optimizations have already been done.
12313   if (!LegalOperations) return SDValue();
12314 
12315   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
12316   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
12317   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
12318 
12319   if (ConstEltNo) {
12320     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12321 
12322     LoadSDNode *LN0 = nullptr;
12323     const ShuffleVectorSDNode *SVN = nullptr;
12324     if (ISD::isNormalLoad(InVec.getNode())) {
12325       LN0 = cast<LoadSDNode>(InVec);
12326     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
12327                InVec.getOperand(0).getValueType() == ExtVT &&
12328                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
12329       // Don't duplicate a load with other uses.
12330       if (!InVec.hasOneUse())
12331         return SDValue();
12332 
12333       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
12334     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
12335       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
12336       // =>
12337       // (load $addr+1*size)
12338 
12339       // Don't duplicate a load with other uses.
12340       if (!InVec.hasOneUse())
12341         return SDValue();
12342 
12343       // If the bit convert changed the number of elements, it is unsafe
12344       // to examine the mask.
12345       if (BCNumEltsChanged)
12346         return SDValue();
12347 
12348       // Select the input vector, guarding against out of range extract vector.
12349       unsigned NumElems = VT.getVectorNumElements();
12350       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
12351       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
12352 
12353       if (InVec.getOpcode() == ISD::BITCAST) {
12354         // Don't duplicate a load with other uses.
12355         if (!InVec.hasOneUse())
12356           return SDValue();
12357 
12358         InVec = InVec.getOperand(0);
12359       }
12360       if (ISD::isNormalLoad(InVec.getNode())) {
12361         LN0 = cast<LoadSDNode>(InVec);
12362         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
12363         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
12364       }
12365     }
12366 
12367     // Make sure we found a non-volatile load and the extractelement is
12368     // the only use.
12369     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
12370       return SDValue();
12371 
12372     // If Idx was -1 above, Elt is going to be -1, so just return undef.
12373     if (Elt == -1)
12374       return DAG.getUNDEF(LVT);
12375 
12376     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
12377   }
12378 
12379   return SDValue();
12380 }
12381 
12382 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
12383 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
12384   // We perform this optimization post type-legalization because
12385   // the type-legalizer often scalarizes integer-promoted vectors.
12386   // Performing this optimization before may create bit-casts which
12387   // will be type-legalized to complex code sequences.
12388   // We perform this optimization only before the operation legalizer because we
12389   // may introduce illegal operations.
12390   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
12391     return SDValue();
12392 
12393   unsigned NumInScalars = N->getNumOperands();
12394   SDLoc dl(N);
12395   EVT VT = N->getValueType(0);
12396 
12397   // Check to see if this is a BUILD_VECTOR of a bunch of values
12398   // which come from any_extend or zero_extend nodes. If so, we can create
12399   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
12400   // optimizations. We do not handle sign-extend because we can't fill the sign
12401   // using shuffles.
12402   EVT SourceType = MVT::Other;
12403   bool AllAnyExt = true;
12404 
12405   for (unsigned i = 0; i != NumInScalars; ++i) {
12406     SDValue In = N->getOperand(i);
12407     // Ignore undef inputs.
12408     if (In.isUndef()) continue;
12409 
12410     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
12411     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
12412 
12413     // Abort if the element is not an extension.
12414     if (!ZeroExt && !AnyExt) {
12415       SourceType = MVT::Other;
12416       break;
12417     }
12418 
12419     // The input is a ZeroExt or AnyExt. Check the original type.
12420     EVT InTy = In.getOperand(0).getValueType();
12421 
12422     // Check that all of the widened source types are the same.
12423     if (SourceType == MVT::Other)
12424       // First time.
12425       SourceType = InTy;
12426     else if (InTy != SourceType) {
12427       // Multiple income types. Abort.
12428       SourceType = MVT::Other;
12429       break;
12430     }
12431 
12432     // Check if all of the extends are ANY_EXTENDs.
12433     AllAnyExt &= AnyExt;
12434   }
12435 
12436   // In order to have valid types, all of the inputs must be extended from the
12437   // same source type and all of the inputs must be any or zero extend.
12438   // Scalar sizes must be a power of two.
12439   EVT OutScalarTy = VT.getScalarType();
12440   bool ValidTypes = SourceType != MVT::Other &&
12441                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
12442                  isPowerOf2_32(SourceType.getSizeInBits());
12443 
12444   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
12445   // turn into a single shuffle instruction.
12446   if (!ValidTypes)
12447     return SDValue();
12448 
12449   bool isLE = DAG.getDataLayout().isLittleEndian();
12450   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
12451   assert(ElemRatio > 1 && "Invalid element size ratio");
12452   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
12453                                DAG.getConstant(0, SDLoc(N), SourceType);
12454 
12455   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
12456   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
12457 
12458   // Populate the new build_vector
12459   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12460     SDValue Cast = N->getOperand(i);
12461     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
12462             Cast.getOpcode() == ISD::ZERO_EXTEND ||
12463             Cast.isUndef()) && "Invalid cast opcode");
12464     SDValue In;
12465     if (Cast.isUndef())
12466       In = DAG.getUNDEF(SourceType);
12467     else
12468       In = Cast->getOperand(0);
12469     unsigned Index = isLE ? (i * ElemRatio) :
12470                             (i * ElemRatio + (ElemRatio - 1));
12471 
12472     assert(Index < Ops.size() && "Invalid index");
12473     Ops[Index] = In;
12474   }
12475 
12476   // The type of the new BUILD_VECTOR node.
12477   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
12478   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
12479          "Invalid vector size");
12480   // Check if the new vector type is legal.
12481   if (!isTypeLegal(VecVT)) return SDValue();
12482 
12483   // Make the new BUILD_VECTOR.
12484   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
12485 
12486   // The new BUILD_VECTOR node has the potential to be further optimized.
12487   AddToWorklist(BV.getNode());
12488   // Bitcast to the desired type.
12489   return DAG.getNode(ISD::BITCAST, dl, VT, BV);
12490 }
12491 
12492 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
12493   EVT VT = N->getValueType(0);
12494 
12495   unsigned NumInScalars = N->getNumOperands();
12496   SDLoc dl(N);
12497 
12498   EVT SrcVT = MVT::Other;
12499   unsigned Opcode = ISD::DELETED_NODE;
12500   unsigned NumDefs = 0;
12501 
12502   for (unsigned i = 0; i != NumInScalars; ++i) {
12503     SDValue In = N->getOperand(i);
12504     unsigned Opc = In.getOpcode();
12505 
12506     if (Opc == ISD::UNDEF)
12507       continue;
12508 
12509     // If all scalar values are floats and converted from integers.
12510     if (Opcode == ISD::DELETED_NODE &&
12511         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
12512       Opcode = Opc;
12513     }
12514 
12515     if (Opc != Opcode)
12516       return SDValue();
12517 
12518     EVT InVT = In.getOperand(0).getValueType();
12519 
12520     // If all scalar values are typed differently, bail out. It's chosen to
12521     // simplify BUILD_VECTOR of integer types.
12522     if (SrcVT == MVT::Other)
12523       SrcVT = InVT;
12524     if (SrcVT != InVT)
12525       return SDValue();
12526     NumDefs++;
12527   }
12528 
12529   // If the vector has just one element defined, it's not worth to fold it into
12530   // a vectorized one.
12531   if (NumDefs < 2)
12532     return SDValue();
12533 
12534   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
12535          && "Should only handle conversion from integer to float.");
12536   assert(SrcVT != MVT::Other && "Cannot determine source type!");
12537 
12538   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
12539 
12540   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
12541     return SDValue();
12542 
12543   // Just because the floating-point vector type is legal does not necessarily
12544   // mean that the corresponding integer vector type is.
12545   if (!isTypeLegal(NVT))
12546     return SDValue();
12547 
12548   SmallVector<SDValue, 8> Opnds;
12549   for (unsigned i = 0; i != NumInScalars; ++i) {
12550     SDValue In = N->getOperand(i);
12551 
12552     if (In.isUndef())
12553       Opnds.push_back(DAG.getUNDEF(SrcVT));
12554     else
12555       Opnds.push_back(In.getOperand(0));
12556   }
12557   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
12558   AddToWorklist(BV.getNode());
12559 
12560   return DAG.getNode(Opcode, dl, VT, BV);
12561 }
12562 
12563 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
12564   unsigned NumInScalars = N->getNumOperands();
12565   SDLoc dl(N);
12566   EVT VT = N->getValueType(0);
12567 
12568   // A vector built entirely of undefs is undef.
12569   if (ISD::allOperandsUndef(N))
12570     return DAG.getUNDEF(VT);
12571 
12572   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
12573     return V;
12574 
12575   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
12576     return V;
12577 
12578   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
12579   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
12580   // at most two distinct vectors, turn this into a shuffle node.
12581 
12582   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
12583   if (!isTypeLegal(VT))
12584     return SDValue();
12585 
12586   // May only combine to shuffle after legalize if shuffle is legal.
12587   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
12588     return SDValue();
12589 
12590   SDValue VecIn1, VecIn2;
12591   bool UsesZeroVector = false;
12592   for (unsigned i = 0; i != NumInScalars; ++i) {
12593     SDValue Op = N->getOperand(i);
12594     // Ignore undef inputs.
12595     if (Op.isUndef()) continue;
12596 
12597     // See if we can combine this build_vector into a blend with a zero vector.
12598     if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
12599       UsesZeroVector = true;
12600       continue;
12601     }
12602 
12603     // If this input is something other than a EXTRACT_VECTOR_ELT with a
12604     // constant index, bail out.
12605     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12606         !isa<ConstantSDNode>(Op.getOperand(1))) {
12607       VecIn1 = VecIn2 = SDValue(nullptr, 0);
12608       break;
12609     }
12610 
12611     // We allow up to two distinct input vectors.
12612     SDValue ExtractedFromVec = Op.getOperand(0);
12613     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
12614       continue;
12615 
12616     if (!VecIn1.getNode()) {
12617       VecIn1 = ExtractedFromVec;
12618     } else if (!VecIn2.getNode() && !UsesZeroVector) {
12619       VecIn2 = ExtractedFromVec;
12620     } else {
12621       // Too many inputs.
12622       VecIn1 = VecIn2 = SDValue(nullptr, 0);
12623       break;
12624     }
12625   }
12626 
12627   // If everything is good, we can make a shuffle operation.
12628   if (VecIn1.getNode()) {
12629     unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
12630     SmallVector<int, 8> Mask;
12631     for (unsigned i = 0; i != NumInScalars; ++i) {
12632       unsigned Opcode = N->getOperand(i).getOpcode();
12633       if (Opcode == ISD::UNDEF) {
12634         Mask.push_back(-1);
12635         continue;
12636       }
12637 
12638       // Operands can also be zero.
12639       if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
12640         assert(UsesZeroVector &&
12641                (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
12642                "Unexpected node found!");
12643         Mask.push_back(NumInScalars+i);
12644         continue;
12645       }
12646 
12647       // If extracting from the first vector, just use the index directly.
12648       SDValue Extract = N->getOperand(i);
12649       SDValue ExtVal = Extract.getOperand(1);
12650       unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
12651       if (Extract.getOperand(0) == VecIn1) {
12652         Mask.push_back(ExtIndex);
12653         continue;
12654       }
12655 
12656       // Otherwise, use InIdx + InputVecSize
12657       Mask.push_back(InNumElements + ExtIndex);
12658     }
12659 
12660     // Avoid introducing illegal shuffles with zero.
12661     if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
12662       return SDValue();
12663 
12664     // We can't generate a shuffle node with mismatched input and output types.
12665     // Attempt to transform a single input vector to the correct type.
12666     if ((VT != VecIn1.getValueType())) {
12667       // If the input vector type has a different base type to the output
12668       // vector type, bail out.
12669       EVT VTElemType = VT.getVectorElementType();
12670       if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
12671           (VecIn2.getNode() &&
12672            (VecIn2.getValueType().getVectorElementType() != VTElemType)))
12673         return SDValue();
12674 
12675       // If the input vector is too small, widen it.
12676       // We only support widening of vectors which are half the size of the
12677       // output registers. For example XMM->YMM widening on X86 with AVX.
12678       EVT VecInT = VecIn1.getValueType();
12679       if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
12680         // If we only have one small input, widen it by adding undef values.
12681         if (!VecIn2.getNode())
12682           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
12683                                DAG.getUNDEF(VecIn1.getValueType()));
12684         else if (VecIn1.getValueType() == VecIn2.getValueType()) {
12685           // If we have two small inputs of the same type, try to concat them.
12686           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
12687           VecIn2 = SDValue(nullptr, 0);
12688         } else
12689           return SDValue();
12690       } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
12691         // If the input vector is too large, try to split it.
12692         // We don't support having two input vectors that are too large.
12693         // If the zero vector was used, we can not split the vector,
12694         // since we'd need 3 inputs.
12695         if (UsesZeroVector || VecIn2.getNode())
12696           return SDValue();
12697 
12698         if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
12699           return SDValue();
12700 
12701         // Try to replace VecIn1 with two extract_subvectors
12702         // No need to update the masks, they should still be correct.
12703         VecIn2 = DAG.getNode(
12704             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12705             DAG.getConstant(VT.getVectorNumElements(), dl,
12706                             TLI.getVectorIdxTy(DAG.getDataLayout())));
12707         VecIn1 = DAG.getNode(
12708             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12709             DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
12710       } else
12711         return SDValue();
12712     }
12713 
12714     if (UsesZeroVector)
12715       VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
12716                                 DAG.getConstantFP(0.0, dl, VT);
12717     else
12718       // If VecIn2 is unused then change it to undef.
12719       VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
12720 
12721     // Check that we were able to transform all incoming values to the same
12722     // type.
12723     if (VecIn2.getValueType() != VecIn1.getValueType() ||
12724         VecIn1.getValueType() != VT)
12725           return SDValue();
12726 
12727     // Return the new VECTOR_SHUFFLE node.
12728     SDValue Ops[2];
12729     Ops[0] = VecIn1;
12730     Ops[1] = VecIn2;
12731     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
12732   }
12733 
12734   return SDValue();
12735 }
12736 
12737 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
12738   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12739   EVT OpVT = N->getOperand(0).getValueType();
12740 
12741   // If the operands are legal vectors, leave them alone.
12742   if (TLI.isTypeLegal(OpVT))
12743     return SDValue();
12744 
12745   SDLoc DL(N);
12746   EVT VT = N->getValueType(0);
12747   SmallVector<SDValue, 8> Ops;
12748 
12749   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
12750   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
12751 
12752   // Keep track of what we encounter.
12753   bool AnyInteger = false;
12754   bool AnyFP = false;
12755   for (const SDValue &Op : N->ops()) {
12756     if (ISD::BITCAST == Op.getOpcode() &&
12757         !Op.getOperand(0).getValueType().isVector())
12758       Ops.push_back(Op.getOperand(0));
12759     else if (ISD::UNDEF == Op.getOpcode())
12760       Ops.push_back(ScalarUndef);
12761     else
12762       return SDValue();
12763 
12764     // Note whether we encounter an integer or floating point scalar.
12765     // If it's neither, bail out, it could be something weird like x86mmx.
12766     EVT LastOpVT = Ops.back().getValueType();
12767     if (LastOpVT.isFloatingPoint())
12768       AnyFP = true;
12769     else if (LastOpVT.isInteger())
12770       AnyInteger = true;
12771     else
12772       return SDValue();
12773   }
12774 
12775   // If any of the operands is a floating point scalar bitcast to a vector,
12776   // use floating point types throughout, and bitcast everything.
12777   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
12778   if (AnyFP) {
12779     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
12780     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
12781     if (AnyInteger) {
12782       for (SDValue &Op : Ops) {
12783         if (Op.getValueType() == SVT)
12784           continue;
12785         if (Op.isUndef())
12786           Op = ScalarUndef;
12787         else
12788           Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
12789       }
12790     }
12791   }
12792 
12793   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
12794                                VT.getSizeInBits() / SVT.getSizeInBits());
12795   return DAG.getNode(ISD::BITCAST, DL, VT,
12796                      DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
12797 }
12798 
12799 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
12800 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
12801 // most two distinct vectors the same size as the result, attempt to turn this
12802 // into a legal shuffle.
12803 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
12804   EVT VT = N->getValueType(0);
12805   EVT OpVT = N->getOperand(0).getValueType();
12806   int NumElts = VT.getVectorNumElements();
12807   int NumOpElts = OpVT.getVectorNumElements();
12808 
12809   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
12810   SmallVector<int, 8> Mask;
12811 
12812   for (SDValue Op : N->ops()) {
12813     // Peek through any bitcast.
12814     while (Op.getOpcode() == ISD::BITCAST)
12815       Op = Op.getOperand(0);
12816 
12817     // UNDEF nodes convert to UNDEF shuffle mask values.
12818     if (Op.isUndef()) {
12819       Mask.append((unsigned)NumOpElts, -1);
12820       continue;
12821     }
12822 
12823     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12824       return SDValue();
12825 
12826     // What vector are we extracting the subvector from and at what index?
12827     SDValue ExtVec = Op.getOperand(0);
12828 
12829     // We want the EVT of the original extraction to correctly scale the
12830     // extraction index.
12831     EVT ExtVT = ExtVec.getValueType();
12832 
12833     // Peek through any bitcast.
12834     while (ExtVec.getOpcode() == ISD::BITCAST)
12835       ExtVec = ExtVec.getOperand(0);
12836 
12837     // UNDEF nodes convert to UNDEF shuffle mask values.
12838     if (ExtVec.isUndef()) {
12839       Mask.append((unsigned)NumOpElts, -1);
12840       continue;
12841     }
12842 
12843     if (!isa<ConstantSDNode>(Op.getOperand(1)))
12844       return SDValue();
12845     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
12846 
12847     // Ensure that we are extracting a subvector from a vector the same
12848     // size as the result.
12849     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
12850       return SDValue();
12851 
12852     // Scale the subvector index to account for any bitcast.
12853     int NumExtElts = ExtVT.getVectorNumElements();
12854     if (0 == (NumExtElts % NumElts))
12855       ExtIdx /= (NumExtElts / NumElts);
12856     else if (0 == (NumElts % NumExtElts))
12857       ExtIdx *= (NumElts / NumExtElts);
12858     else
12859       return SDValue();
12860 
12861     // At most we can reference 2 inputs in the final shuffle.
12862     if (SV0.isUndef() || SV0 == ExtVec) {
12863       SV0 = ExtVec;
12864       for (int i = 0; i != NumOpElts; ++i)
12865         Mask.push_back(i + ExtIdx);
12866     } else if (SV1.isUndef() || SV1 == ExtVec) {
12867       SV1 = ExtVec;
12868       for (int i = 0; i != NumOpElts; ++i)
12869         Mask.push_back(i + ExtIdx + NumElts);
12870     } else {
12871       return SDValue();
12872     }
12873   }
12874 
12875   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
12876     return SDValue();
12877 
12878   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
12879                               DAG.getBitcast(VT, SV1), Mask);
12880 }
12881 
12882 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
12883   // If we only have one input vector, we don't need to do any concatenation.
12884   if (N->getNumOperands() == 1)
12885     return N->getOperand(0);
12886 
12887   // Check if all of the operands are undefs.
12888   EVT VT = N->getValueType(0);
12889   if (ISD::allOperandsUndef(N))
12890     return DAG.getUNDEF(VT);
12891 
12892   // Optimize concat_vectors where all but the first of the vectors are undef.
12893   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
12894         return Op.isUndef();
12895       })) {
12896     SDValue In = N->getOperand(0);
12897     assert(In.getValueType().isVector() && "Must concat vectors");
12898 
12899     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
12900     if (In->getOpcode() == ISD::BITCAST &&
12901         !In->getOperand(0)->getValueType(0).isVector()) {
12902       SDValue Scalar = In->getOperand(0);
12903 
12904       // If the bitcast type isn't legal, it might be a trunc of a legal type;
12905       // look through the trunc so we can still do the transform:
12906       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
12907       if (Scalar->getOpcode() == ISD::TRUNCATE &&
12908           !TLI.isTypeLegal(Scalar.getValueType()) &&
12909           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
12910         Scalar = Scalar->getOperand(0);
12911 
12912       EVT SclTy = Scalar->getValueType(0);
12913 
12914       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
12915         return SDValue();
12916 
12917       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
12918                                  VT.getSizeInBits() / SclTy.getSizeInBits());
12919       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
12920         return SDValue();
12921 
12922       SDLoc dl = SDLoc(N);
12923       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
12924       return DAG.getNode(ISD::BITCAST, dl, VT, Res);
12925     }
12926   }
12927 
12928   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
12929   // We have already tested above for an UNDEF only concatenation.
12930   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
12931   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
12932   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
12933     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
12934   };
12935   bool AllBuildVectorsOrUndefs =
12936       std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
12937   if (AllBuildVectorsOrUndefs) {
12938     SmallVector<SDValue, 8> Opnds;
12939     EVT SVT = VT.getScalarType();
12940 
12941     EVT MinVT = SVT;
12942     if (!SVT.isFloatingPoint()) {
12943       // If BUILD_VECTOR are from built from integer, they may have different
12944       // operand types. Get the smallest type and truncate all operands to it.
12945       bool FoundMinVT = false;
12946       for (const SDValue &Op : N->ops())
12947         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
12948           EVT OpSVT = Op.getOperand(0)->getValueType(0);
12949           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
12950           FoundMinVT = true;
12951         }
12952       assert(FoundMinVT && "Concat vector type mismatch");
12953     }
12954 
12955     for (const SDValue &Op : N->ops()) {
12956       EVT OpVT = Op.getValueType();
12957       unsigned NumElts = OpVT.getVectorNumElements();
12958 
12959       if (ISD::UNDEF == Op.getOpcode())
12960         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
12961 
12962       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
12963         if (SVT.isFloatingPoint()) {
12964           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
12965           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
12966         } else {
12967           for (unsigned i = 0; i != NumElts; ++i)
12968             Opnds.push_back(
12969                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
12970         }
12971       }
12972     }
12973 
12974     assert(VT.getVectorNumElements() == Opnds.size() &&
12975            "Concat vector type mismatch");
12976     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
12977   }
12978 
12979   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
12980   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
12981     return V;
12982 
12983   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
12984   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
12985     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
12986       return V;
12987 
12988   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
12989   // nodes often generate nop CONCAT_VECTOR nodes.
12990   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
12991   // place the incoming vectors at the exact same location.
12992   SDValue SingleSource = SDValue();
12993   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
12994 
12995   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12996     SDValue Op = N->getOperand(i);
12997 
12998     if (Op.isUndef())
12999       continue;
13000 
13001     // Check if this is the identity extract:
13002     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13003       return SDValue();
13004 
13005     // Find the single incoming vector for the extract_subvector.
13006     if (SingleSource.getNode()) {
13007       if (Op.getOperand(0) != SingleSource)
13008         return SDValue();
13009     } else {
13010       SingleSource = Op.getOperand(0);
13011 
13012       // Check the source type is the same as the type of the result.
13013       // If not, this concat may extend the vector, so we can not
13014       // optimize it away.
13015       if (SingleSource.getValueType() != N->getValueType(0))
13016         return SDValue();
13017     }
13018 
13019     unsigned IdentityIndex = i * PartNumElem;
13020     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13021     // The extract index must be constant.
13022     if (!CS)
13023       return SDValue();
13024 
13025     // Check that we are reading from the identity index.
13026     if (CS->getZExtValue() != IdentityIndex)
13027       return SDValue();
13028   }
13029 
13030   if (SingleSource.getNode())
13031     return SingleSource;
13032 
13033   return SDValue();
13034 }
13035 
13036 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
13037   EVT NVT = N->getValueType(0);
13038   SDValue V = N->getOperand(0);
13039 
13040   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
13041     // Combine:
13042     //    (extract_subvec (concat V1, V2, ...), i)
13043     // Into:
13044     //    Vi if possible
13045     // Only operand 0 is checked as 'concat' assumes all inputs of the same
13046     // type.
13047     if (V->getOperand(0).getValueType() != NVT)
13048       return SDValue();
13049     unsigned Idx = N->getConstantOperandVal(1);
13050     unsigned NumElems = NVT.getVectorNumElements();
13051     assert((Idx % NumElems) == 0 &&
13052            "IDX in concat is not a multiple of the result vector length.");
13053     return V->getOperand(Idx / NumElems);
13054   }
13055 
13056   // Skip bitcasting
13057   if (V->getOpcode() == ISD::BITCAST)
13058     V = V.getOperand(0);
13059 
13060   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
13061     SDLoc dl(N);
13062     // Handle only simple case where vector being inserted and vector
13063     // being extracted are of same type, and are half size of larger vectors.
13064     EVT BigVT = V->getOperand(0).getValueType();
13065     EVT SmallVT = V->getOperand(1).getValueType();
13066     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
13067       return SDValue();
13068 
13069     // Only handle cases where both indexes are constants with the same type.
13070     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
13071     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
13072 
13073     if (InsIdx && ExtIdx &&
13074         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
13075         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
13076       // Combine:
13077       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
13078       // Into:
13079       //    indices are equal or bit offsets are equal => V1
13080       //    otherwise => (extract_subvec V1, ExtIdx)
13081       if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
13082           ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
13083         return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
13084       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
13085                          DAG.getNode(ISD::BITCAST, dl,
13086                                      N->getOperand(0).getValueType(),
13087                                      V->getOperand(0)), N->getOperand(1));
13088     }
13089   }
13090 
13091   return SDValue();
13092 }
13093 
13094 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
13095                                                  SDValue V, SelectionDAG &DAG) {
13096   SDLoc DL(V);
13097   EVT VT = V.getValueType();
13098 
13099   switch (V.getOpcode()) {
13100   default:
13101     return V;
13102 
13103   case ISD::CONCAT_VECTORS: {
13104     EVT OpVT = V->getOperand(0).getValueType();
13105     int OpSize = OpVT.getVectorNumElements();
13106     SmallBitVector OpUsedElements(OpSize, false);
13107     bool FoundSimplification = false;
13108     SmallVector<SDValue, 4> NewOps;
13109     NewOps.reserve(V->getNumOperands());
13110     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
13111       SDValue Op = V->getOperand(i);
13112       bool OpUsed = false;
13113       for (int j = 0; j < OpSize; ++j)
13114         if (UsedElements[i * OpSize + j]) {
13115           OpUsedElements[j] = true;
13116           OpUsed = true;
13117         }
13118       NewOps.push_back(
13119           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
13120                  : DAG.getUNDEF(OpVT));
13121       FoundSimplification |= Op == NewOps.back();
13122       OpUsedElements.reset();
13123     }
13124     if (FoundSimplification)
13125       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
13126     return V;
13127   }
13128 
13129   case ISD::INSERT_SUBVECTOR: {
13130     SDValue BaseV = V->getOperand(0);
13131     SDValue SubV = V->getOperand(1);
13132     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
13133     if (!IdxN)
13134       return V;
13135 
13136     int SubSize = SubV.getValueType().getVectorNumElements();
13137     int Idx = IdxN->getZExtValue();
13138     bool SubVectorUsed = false;
13139     SmallBitVector SubUsedElements(SubSize, false);
13140     for (int i = 0; i < SubSize; ++i)
13141       if (UsedElements[i + Idx]) {
13142         SubVectorUsed = true;
13143         SubUsedElements[i] = true;
13144         UsedElements[i + Idx] = false;
13145       }
13146 
13147     // Now recurse on both the base and sub vectors.
13148     SDValue SimplifiedSubV =
13149         SubVectorUsed
13150             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
13151             : DAG.getUNDEF(SubV.getValueType());
13152     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
13153     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
13154       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
13155                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
13156     return V;
13157   }
13158   }
13159 }
13160 
13161 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
13162                                        SDValue N1, SelectionDAG &DAG) {
13163   EVT VT = SVN->getValueType(0);
13164   int NumElts = VT.getVectorNumElements();
13165   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
13166   for (int M : SVN->getMask())
13167     if (M >= 0 && M < NumElts)
13168       N0UsedElements[M] = true;
13169     else if (M >= NumElts)
13170       N1UsedElements[M - NumElts] = true;
13171 
13172   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
13173   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
13174   if (S0 == N0 && S1 == N1)
13175     return SDValue();
13176 
13177   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
13178 }
13179 
13180 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
13181 // or turn a shuffle of a single concat into simpler shuffle then concat.
13182 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
13183   EVT VT = N->getValueType(0);
13184   unsigned NumElts = VT.getVectorNumElements();
13185 
13186   SDValue N0 = N->getOperand(0);
13187   SDValue N1 = N->getOperand(1);
13188   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13189 
13190   SmallVector<SDValue, 4> Ops;
13191   EVT ConcatVT = N0.getOperand(0).getValueType();
13192   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
13193   unsigned NumConcats = NumElts / NumElemsPerConcat;
13194 
13195   // Special case: shuffle(concat(A,B)) can be more efficiently represented
13196   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
13197   // half vector elements.
13198   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
13199       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
13200                   SVN->getMask().end(), [](int i) { return i == -1; })) {
13201     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
13202                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
13203     N1 = DAG.getUNDEF(ConcatVT);
13204     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
13205   }
13206 
13207   // Look at every vector that's inserted. We're looking for exact
13208   // subvector-sized copies from a concatenated vector
13209   for (unsigned I = 0; I != NumConcats; ++I) {
13210     // Make sure we're dealing with a copy.
13211     unsigned Begin = I * NumElemsPerConcat;
13212     bool AllUndef = true, NoUndef = true;
13213     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
13214       if (SVN->getMaskElt(J) >= 0)
13215         AllUndef = false;
13216       else
13217         NoUndef = false;
13218     }
13219 
13220     if (NoUndef) {
13221       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
13222         return SDValue();
13223 
13224       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
13225         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
13226           return SDValue();
13227 
13228       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
13229       if (FirstElt < N0.getNumOperands())
13230         Ops.push_back(N0.getOperand(FirstElt));
13231       else
13232         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
13233 
13234     } else if (AllUndef) {
13235       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
13236     } else { // Mixed with general masks and undefs, can't do optimization.
13237       return SDValue();
13238     }
13239   }
13240 
13241   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
13242 }
13243 
13244 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
13245   EVT VT = N->getValueType(0);
13246   unsigned NumElts = VT.getVectorNumElements();
13247 
13248   SDValue N0 = N->getOperand(0);
13249   SDValue N1 = N->getOperand(1);
13250 
13251   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
13252 
13253   // Canonicalize shuffle undef, undef -> undef
13254   if (N0.isUndef() && N1.isUndef())
13255     return DAG.getUNDEF(VT);
13256 
13257   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13258 
13259   // Canonicalize shuffle v, v -> v, undef
13260   if (N0 == N1) {
13261     SmallVector<int, 8> NewMask;
13262     for (unsigned i = 0; i != NumElts; ++i) {
13263       int Idx = SVN->getMaskElt(i);
13264       if (Idx >= (int)NumElts) Idx -= NumElts;
13265       NewMask.push_back(Idx);
13266     }
13267     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
13268                                 &NewMask[0]);
13269   }
13270 
13271   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
13272   if (N0.isUndef()) {
13273     SmallVector<int, 8> NewMask;
13274     for (unsigned i = 0; i != NumElts; ++i) {
13275       int Idx = SVN->getMaskElt(i);
13276       if (Idx >= 0) {
13277         if (Idx >= (int)NumElts)
13278           Idx -= NumElts;
13279         else
13280           Idx = -1; // remove reference to lhs
13281       }
13282       NewMask.push_back(Idx);
13283     }
13284     return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
13285                                 &NewMask[0]);
13286   }
13287 
13288   // Remove references to rhs if it is undef
13289   if (N1.isUndef()) {
13290     bool Changed = false;
13291     SmallVector<int, 8> NewMask;
13292     for (unsigned i = 0; i != NumElts; ++i) {
13293       int Idx = SVN->getMaskElt(i);
13294       if (Idx >= (int)NumElts) {
13295         Idx = -1;
13296         Changed = true;
13297       }
13298       NewMask.push_back(Idx);
13299     }
13300     if (Changed)
13301       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
13302   }
13303 
13304   // If it is a splat, check if the argument vector is another splat or a
13305   // build_vector.
13306   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
13307     SDNode *V = N0.getNode();
13308 
13309     // If this is a bit convert that changes the element type of the vector but
13310     // not the number of vector elements, look through it.  Be careful not to
13311     // look though conversions that change things like v4f32 to v2f64.
13312     if (V->getOpcode() == ISD::BITCAST) {
13313       SDValue ConvInput = V->getOperand(0);
13314       if (ConvInput.getValueType().isVector() &&
13315           ConvInput.getValueType().getVectorNumElements() == NumElts)
13316         V = ConvInput.getNode();
13317     }
13318 
13319     if (V->getOpcode() == ISD::BUILD_VECTOR) {
13320       assert(V->getNumOperands() == NumElts &&
13321              "BUILD_VECTOR has wrong number of operands");
13322       SDValue Base;
13323       bool AllSame = true;
13324       for (unsigned i = 0; i != NumElts; ++i) {
13325         if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
13326           Base = V->getOperand(i);
13327           break;
13328         }
13329       }
13330       // Splat of <u, u, u, u>, return <u, u, u, u>
13331       if (!Base.getNode())
13332         return N0;
13333       for (unsigned i = 0; i != NumElts; ++i) {
13334         if (V->getOperand(i) != Base) {
13335           AllSame = false;
13336           break;
13337         }
13338       }
13339       // Splat of <x, x, x, x>, return <x, x, x, x>
13340       if (AllSame)
13341         return N0;
13342 
13343       // Canonicalize any other splat as a build_vector.
13344       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
13345       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
13346       SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
13347                                   V->getValueType(0), Ops);
13348 
13349       // We may have jumped through bitcasts, so the type of the
13350       // BUILD_VECTOR may not match the type of the shuffle.
13351       if (V->getValueType(0) != VT)
13352         NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
13353       return NewBV;
13354     }
13355   }
13356 
13357   // There are various patterns used to build up a vector from smaller vectors,
13358   // subvectors, or elements. Scan chains of these and replace unused insertions
13359   // or components with undef.
13360   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
13361     return S;
13362 
13363   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
13364       Level < AfterLegalizeVectorOps &&
13365       (N1.isUndef() ||
13366       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13367        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
13368     if (SDValue V = partitionShuffleOfConcats(N, DAG))
13369       return V;
13370   }
13371 
13372   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
13373   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
13374   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
13375     SmallVector<SDValue, 8> Ops;
13376     for (int M : SVN->getMask()) {
13377       SDValue Op = DAG.getUNDEF(VT.getScalarType());
13378       if (M >= 0) {
13379         int Idx = M % NumElts;
13380         SDValue &S = (M < (int)NumElts ? N0 : N1);
13381         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
13382           Op = S.getOperand(Idx);
13383         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
13384           if (Idx == 0)
13385             Op = S.getOperand(0);
13386         } else {
13387           // Operand can't be combined - bail out.
13388           break;
13389         }
13390       }
13391       Ops.push_back(Op);
13392     }
13393     if (Ops.size() == VT.getVectorNumElements()) {
13394       // BUILD_VECTOR requires all inputs to be of the same type, find the
13395       // maximum type and extend them all.
13396       EVT SVT = VT.getScalarType();
13397       if (SVT.isInteger())
13398         for (SDValue &Op : Ops)
13399           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
13400       if (SVT != VT.getScalarType())
13401         for (SDValue &Op : Ops)
13402           Op = TLI.isZExtFree(Op.getValueType(), SVT)
13403                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
13404                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
13405       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
13406     }
13407   }
13408 
13409   // If this shuffle only has a single input that is a bitcasted shuffle,
13410   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
13411   // back to their original types.
13412   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13413       N1.isUndef() && Level < AfterLegalizeVectorOps &&
13414       TLI.isTypeLegal(VT)) {
13415 
13416     // Peek through the bitcast only if there is one user.
13417     SDValue BC0 = N0;
13418     while (BC0.getOpcode() == ISD::BITCAST) {
13419       if (!BC0.hasOneUse())
13420         break;
13421       BC0 = BC0.getOperand(0);
13422     }
13423 
13424     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
13425       if (Scale == 1)
13426         return SmallVector<int, 8>(Mask.begin(), Mask.end());
13427 
13428       SmallVector<int, 8> NewMask;
13429       for (int M : Mask)
13430         for (int s = 0; s != Scale; ++s)
13431           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
13432       return NewMask;
13433     };
13434 
13435     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
13436       EVT SVT = VT.getScalarType();
13437       EVT InnerVT = BC0->getValueType(0);
13438       EVT InnerSVT = InnerVT.getScalarType();
13439 
13440       // Determine which shuffle works with the smaller scalar type.
13441       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
13442       EVT ScaleSVT = ScaleVT.getScalarType();
13443 
13444       if (TLI.isTypeLegal(ScaleVT) &&
13445           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
13446           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
13447 
13448         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13449         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13450 
13451         // Scale the shuffle masks to the smaller scalar type.
13452         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
13453         SmallVector<int, 8> InnerMask =
13454             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
13455         SmallVector<int, 8> OuterMask =
13456             ScaleShuffleMask(SVN->getMask(), OuterScale);
13457 
13458         // Merge the shuffle masks.
13459         SmallVector<int, 8> NewMask;
13460         for (int M : OuterMask)
13461           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
13462 
13463         // Test for shuffle mask legality over both commutations.
13464         SDValue SV0 = BC0->getOperand(0);
13465         SDValue SV1 = BC0->getOperand(1);
13466         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13467         if (!LegalMask) {
13468           std::swap(SV0, SV1);
13469           ShuffleVectorSDNode::commuteMask(NewMask);
13470           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13471         }
13472 
13473         if (LegalMask) {
13474           SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
13475           SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
13476           return DAG.getNode(
13477               ISD::BITCAST, SDLoc(N), VT,
13478               DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
13479         }
13480       }
13481     }
13482   }
13483 
13484   // Canonicalize shuffles according to rules:
13485   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
13486   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
13487   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
13488   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
13489       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
13490       TLI.isTypeLegal(VT)) {
13491     // The incoming shuffle must be of the same type as the result of the
13492     // current shuffle.
13493     assert(N1->getOperand(0).getValueType() == VT &&
13494            "Shuffle types don't match");
13495 
13496     SDValue SV0 = N1->getOperand(0);
13497     SDValue SV1 = N1->getOperand(1);
13498     bool HasSameOp0 = N0 == SV0;
13499     bool IsSV1Undef = SV1.isUndef();
13500     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
13501       // Commute the operands of this shuffle so that next rule
13502       // will trigger.
13503       return DAG.getCommutedVectorShuffle(*SVN);
13504   }
13505 
13506   // Try to fold according to rules:
13507   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
13508   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
13509   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
13510   // Don't try to fold shuffles with illegal type.
13511   // Only fold if this shuffle is the only user of the other shuffle.
13512   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
13513       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
13514     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
13515 
13516     // The incoming shuffle must be of the same type as the result of the
13517     // current shuffle.
13518     assert(OtherSV->getOperand(0).getValueType() == VT &&
13519            "Shuffle types don't match");
13520 
13521     SDValue SV0, SV1;
13522     SmallVector<int, 4> Mask;
13523     // Compute the combined shuffle mask for a shuffle with SV0 as the first
13524     // operand, and SV1 as the second operand.
13525     for (unsigned i = 0; i != NumElts; ++i) {
13526       int Idx = SVN->getMaskElt(i);
13527       if (Idx < 0) {
13528         // Propagate Undef.
13529         Mask.push_back(Idx);
13530         continue;
13531       }
13532 
13533       SDValue CurrentVec;
13534       if (Idx < (int)NumElts) {
13535         // This shuffle index refers to the inner shuffle N0. Lookup the inner
13536         // shuffle mask to identify which vector is actually referenced.
13537         Idx = OtherSV->getMaskElt(Idx);
13538         if (Idx < 0) {
13539           // Propagate Undef.
13540           Mask.push_back(Idx);
13541           continue;
13542         }
13543 
13544         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
13545                                            : OtherSV->getOperand(1);
13546       } else {
13547         // This shuffle index references an element within N1.
13548         CurrentVec = N1;
13549       }
13550 
13551       // Simple case where 'CurrentVec' is UNDEF.
13552       if (CurrentVec.isUndef()) {
13553         Mask.push_back(-1);
13554         continue;
13555       }
13556 
13557       // Canonicalize the shuffle index. We don't know yet if CurrentVec
13558       // will be the first or second operand of the combined shuffle.
13559       Idx = Idx % NumElts;
13560       if (!SV0.getNode() || SV0 == CurrentVec) {
13561         // Ok. CurrentVec is the left hand side.
13562         // Update the mask accordingly.
13563         SV0 = CurrentVec;
13564         Mask.push_back(Idx);
13565         continue;
13566       }
13567 
13568       // Bail out if we cannot convert the shuffle pair into a single shuffle.
13569       if (SV1.getNode() && SV1 != CurrentVec)
13570         return SDValue();
13571 
13572       // Ok. CurrentVec is the right hand side.
13573       // Update the mask accordingly.
13574       SV1 = CurrentVec;
13575       Mask.push_back(Idx + NumElts);
13576     }
13577 
13578     // Check if all indices in Mask are Undef. In case, propagate Undef.
13579     bool isUndefMask = true;
13580     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
13581       isUndefMask &= Mask[i] < 0;
13582 
13583     if (isUndefMask)
13584       return DAG.getUNDEF(VT);
13585 
13586     if (!SV0.getNode())
13587       SV0 = DAG.getUNDEF(VT);
13588     if (!SV1.getNode())
13589       SV1 = DAG.getUNDEF(VT);
13590 
13591     // Avoid introducing shuffles with illegal mask.
13592     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
13593       ShuffleVectorSDNode::commuteMask(Mask);
13594 
13595       if (!TLI.isShuffleMaskLegal(Mask, VT))
13596         return SDValue();
13597 
13598       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
13599       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
13600       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
13601       std::swap(SV0, SV1);
13602     }
13603 
13604     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
13605     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
13606     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
13607     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
13608   }
13609 
13610   return SDValue();
13611 }
13612 
13613 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
13614   SDValue InVal = N->getOperand(0);
13615   EVT VT = N->getValueType(0);
13616 
13617   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
13618   // with a VECTOR_SHUFFLE.
13619   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13620     SDValue InVec = InVal->getOperand(0);
13621     SDValue EltNo = InVal->getOperand(1);
13622 
13623     // FIXME: We could support implicit truncation if the shuffle can be
13624     // scaled to a smaller vector scalar type.
13625     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
13626     if (C0 && VT == InVec.getValueType() &&
13627         VT.getScalarType() == InVal.getValueType()) {
13628       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
13629       int Elt = C0->getZExtValue();
13630       NewMask[0] = Elt;
13631 
13632       if (TLI.isShuffleMaskLegal(NewMask, VT))
13633         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
13634                                     NewMask);
13635     }
13636   }
13637 
13638   return SDValue();
13639 }
13640 
13641 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
13642   SDValue N0 = N->getOperand(0);
13643   SDValue N2 = N->getOperand(2);
13644 
13645   // If the input vector is a concatenation, and the insert replaces
13646   // one of the halves, we can optimize into a single concat_vectors.
13647   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
13648       N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
13649     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
13650     EVT VT = N->getValueType(0);
13651 
13652     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
13653     // (concat_vectors Z, Y)
13654     if (InsIdx == 0)
13655       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
13656                          N->getOperand(1), N0.getOperand(1));
13657 
13658     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
13659     // (concat_vectors X, Z)
13660     if (InsIdx == VT.getVectorNumElements()/2)
13661       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
13662                          N0.getOperand(0), N->getOperand(1));
13663   }
13664 
13665   return SDValue();
13666 }
13667 
13668 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
13669   SDValue N0 = N->getOperand(0);
13670 
13671   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
13672   if (N0->getOpcode() == ISD::FP16_TO_FP)
13673     return N0->getOperand(0);
13674 
13675   return SDValue();
13676 }
13677 
13678 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
13679   SDValue N0 = N->getOperand(0);
13680 
13681   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
13682   if (N0->getOpcode() == ISD::AND) {
13683     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
13684     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
13685       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
13686                          N0.getOperand(0));
13687     }
13688   }
13689 
13690   return SDValue();
13691 }
13692 
13693 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
13694 /// with the destination vector and a zero vector.
13695 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
13696 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
13697 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
13698   EVT VT = N->getValueType(0);
13699   SDValue LHS = N->getOperand(0);
13700   SDValue RHS = N->getOperand(1);
13701   SDLoc dl(N);
13702 
13703   // Make sure we're not running after operation legalization where it
13704   // may have custom lowered the vector shuffles.
13705   if (LegalOperations)
13706     return SDValue();
13707 
13708   if (N->getOpcode() != ISD::AND)
13709     return SDValue();
13710 
13711   if (RHS.getOpcode() == ISD::BITCAST)
13712     RHS = RHS.getOperand(0);
13713 
13714   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
13715     return SDValue();
13716 
13717   EVT RVT = RHS.getValueType();
13718   unsigned NumElts = RHS.getNumOperands();
13719 
13720   // Attempt to create a valid clear mask, splitting the mask into
13721   // sub elements and checking to see if each is
13722   // all zeros or all ones - suitable for shuffle masking.
13723   auto BuildClearMask = [&](int Split) {
13724     int NumSubElts = NumElts * Split;
13725     int NumSubBits = RVT.getScalarSizeInBits() / Split;
13726 
13727     SmallVector<int, 8> Indices;
13728     for (int i = 0; i != NumSubElts; ++i) {
13729       int EltIdx = i / Split;
13730       int SubIdx = i % Split;
13731       SDValue Elt = RHS.getOperand(EltIdx);
13732       if (Elt.isUndef()) {
13733         Indices.push_back(-1);
13734         continue;
13735       }
13736 
13737       APInt Bits;
13738       if (isa<ConstantSDNode>(Elt))
13739         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
13740       else if (isa<ConstantFPSDNode>(Elt))
13741         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
13742       else
13743         return SDValue();
13744 
13745       // Extract the sub element from the constant bit mask.
13746       if (DAG.getDataLayout().isBigEndian()) {
13747         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
13748       } else {
13749         Bits = Bits.lshr(SubIdx * NumSubBits);
13750       }
13751 
13752       if (Split > 1)
13753         Bits = Bits.trunc(NumSubBits);
13754 
13755       if (Bits.isAllOnesValue())
13756         Indices.push_back(i);
13757       else if (Bits == 0)
13758         Indices.push_back(i + NumSubElts);
13759       else
13760         return SDValue();
13761     }
13762 
13763     // Let's see if the target supports this vector_shuffle.
13764     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
13765     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
13766     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
13767       return SDValue();
13768 
13769     SDValue Zero = DAG.getConstant(0, dl, ClearVT);
13770     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
13771                                                    DAG.getBitcast(ClearVT, LHS),
13772                                                    Zero, &Indices[0]));
13773   };
13774 
13775   // Determine maximum split level (byte level masking).
13776   int MaxSplit = 1;
13777   if (RVT.getScalarSizeInBits() % 8 == 0)
13778     MaxSplit = RVT.getScalarSizeInBits() / 8;
13779 
13780   for (int Split = 1; Split <= MaxSplit; ++Split)
13781     if (RVT.getScalarSizeInBits() % Split == 0)
13782       if (SDValue S = BuildClearMask(Split))
13783         return S;
13784 
13785   return SDValue();
13786 }
13787 
13788 /// Visit a binary vector operation, like ADD.
13789 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
13790   assert(N->getValueType(0).isVector() &&
13791          "SimplifyVBinOp only works on vectors!");
13792 
13793   SDValue LHS = N->getOperand(0);
13794   SDValue RHS = N->getOperand(1);
13795   SDValue Ops[] = {LHS, RHS};
13796 
13797   // See if we can constant fold the vector operation.
13798   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
13799           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
13800     return Fold;
13801 
13802   // Try to convert a constant mask AND into a shuffle clear mask.
13803   if (SDValue Shuffle = XformToShuffleWithZero(N))
13804     return Shuffle;
13805 
13806   // Type legalization might introduce new shuffles in the DAG.
13807   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
13808   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
13809   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
13810       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
13811       LHS.getOperand(1).isUndef() &&
13812       RHS.getOperand(1).isUndef()) {
13813     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
13814     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
13815 
13816     if (SVN0->getMask().equals(SVN1->getMask())) {
13817       EVT VT = N->getValueType(0);
13818       SDValue UndefVector = LHS.getOperand(1);
13819       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
13820                                      LHS.getOperand(0), RHS.getOperand(0),
13821                                      N->getFlags());
13822       AddUsersToWorklist(N);
13823       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
13824                                   &SVN0->getMask()[0]);
13825     }
13826   }
13827 
13828   return SDValue();
13829 }
13830 
13831 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
13832                                     SDValue N1, SDValue N2){
13833   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
13834 
13835   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
13836                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
13837 
13838   // If we got a simplified select_cc node back from SimplifySelectCC, then
13839   // break it down into a new SETCC node, and a new SELECT node, and then return
13840   // the SELECT node, since we were called with a SELECT node.
13841   if (SCC.getNode()) {
13842     // Check to see if we got a select_cc back (to turn into setcc/select).
13843     // Otherwise, just return whatever node we got back, like fabs.
13844     if (SCC.getOpcode() == ISD::SELECT_CC) {
13845       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
13846                                   N0.getValueType(),
13847                                   SCC.getOperand(0), SCC.getOperand(1),
13848                                   SCC.getOperand(4));
13849       AddToWorklist(SETCC.getNode());
13850       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
13851                            SCC.getOperand(2), SCC.getOperand(3));
13852     }
13853 
13854     return SCC;
13855   }
13856   return SDValue();
13857 }
13858 
13859 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
13860 /// being selected between, see if we can simplify the select.  Callers of this
13861 /// should assume that TheSelect is deleted if this returns true.  As such, they
13862 /// should return the appropriate thing (e.g. the node) back to the top-level of
13863 /// the DAG combiner loop to avoid it being looked at.
13864 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
13865                                     SDValue RHS) {
13866 
13867   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
13868   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
13869   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
13870     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
13871       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
13872       SDValue Sqrt = RHS;
13873       ISD::CondCode CC;
13874       SDValue CmpLHS;
13875       const ConstantFPSDNode *Zero = nullptr;
13876 
13877       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
13878         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
13879         CmpLHS = TheSelect->getOperand(0);
13880         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
13881       } else {
13882         // SELECT or VSELECT
13883         SDValue Cmp = TheSelect->getOperand(0);
13884         if (Cmp.getOpcode() == ISD::SETCC) {
13885           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
13886           CmpLHS = Cmp.getOperand(0);
13887           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
13888         }
13889       }
13890       if (Zero && Zero->isZero() &&
13891           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
13892           CC == ISD::SETULT || CC == ISD::SETLT)) {
13893         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
13894         CombineTo(TheSelect, Sqrt);
13895         return true;
13896       }
13897     }
13898   }
13899   // Cannot simplify select with vector condition
13900   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
13901 
13902   // If this is a select from two identical things, try to pull the operation
13903   // through the select.
13904   if (LHS.getOpcode() != RHS.getOpcode() ||
13905       !LHS.hasOneUse() || !RHS.hasOneUse())
13906     return false;
13907 
13908   // If this is a load and the token chain is identical, replace the select
13909   // of two loads with a load through a select of the address to load from.
13910   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
13911   // constants have been dropped into the constant pool.
13912   if (LHS.getOpcode() == ISD::LOAD) {
13913     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
13914     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
13915 
13916     // Token chains must be identical.
13917     if (LHS.getOperand(0) != RHS.getOperand(0) ||
13918         // Do not let this transformation reduce the number of volatile loads.
13919         LLD->isVolatile() || RLD->isVolatile() ||
13920         // FIXME: If either is a pre/post inc/dec load,
13921         // we'd need to split out the address adjustment.
13922         LLD->isIndexed() || RLD->isIndexed() ||
13923         // If this is an EXTLOAD, the VT's must match.
13924         LLD->getMemoryVT() != RLD->getMemoryVT() ||
13925         // If this is an EXTLOAD, the kind of extension must match.
13926         (LLD->getExtensionType() != RLD->getExtensionType() &&
13927          // The only exception is if one of the extensions is anyext.
13928          LLD->getExtensionType() != ISD::EXTLOAD &&
13929          RLD->getExtensionType() != ISD::EXTLOAD) ||
13930         // FIXME: this discards src value information.  This is
13931         // over-conservative. It would be beneficial to be able to remember
13932         // both potential memory locations.  Since we are discarding
13933         // src value info, don't do the transformation if the memory
13934         // locations are not in the default address space.
13935         LLD->getPointerInfo().getAddrSpace() != 0 ||
13936         RLD->getPointerInfo().getAddrSpace() != 0 ||
13937         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
13938                                       LLD->getBasePtr().getValueType()))
13939       return false;
13940 
13941     // Check that the select condition doesn't reach either load.  If so,
13942     // folding this will induce a cycle into the DAG.  If not, this is safe to
13943     // xform, so create a select of the addresses.
13944     SDValue Addr;
13945     if (TheSelect->getOpcode() == ISD::SELECT) {
13946       SDNode *CondNode = TheSelect->getOperand(0).getNode();
13947       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
13948           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
13949         return false;
13950       // The loads must not depend on one another.
13951       if (LLD->isPredecessorOf(RLD) ||
13952           RLD->isPredecessorOf(LLD))
13953         return false;
13954       Addr = DAG.getSelect(SDLoc(TheSelect),
13955                            LLD->getBasePtr().getValueType(),
13956                            TheSelect->getOperand(0), LLD->getBasePtr(),
13957                            RLD->getBasePtr());
13958     } else {  // Otherwise SELECT_CC
13959       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
13960       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
13961 
13962       if ((LLD->hasAnyUseOfValue(1) &&
13963            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
13964           (RLD->hasAnyUseOfValue(1) &&
13965            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
13966         return false;
13967 
13968       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
13969                          LLD->getBasePtr().getValueType(),
13970                          TheSelect->getOperand(0),
13971                          TheSelect->getOperand(1),
13972                          LLD->getBasePtr(), RLD->getBasePtr(),
13973                          TheSelect->getOperand(4));
13974     }
13975 
13976     SDValue Load;
13977     // It is safe to replace the two loads if they have different alignments,
13978     // but the new load must be the minimum (most restrictive) alignment of the
13979     // inputs.
13980     bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
13981     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
13982     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
13983       Load = DAG.getLoad(TheSelect->getValueType(0),
13984                          SDLoc(TheSelect),
13985                          // FIXME: Discards pointer and AA info.
13986                          LLD->getChain(), Addr, MachinePointerInfo(),
13987                          LLD->isVolatile(), LLD->isNonTemporal(),
13988                          isInvariant, Alignment);
13989     } else {
13990       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
13991                             RLD->getExtensionType() : LLD->getExtensionType(),
13992                             SDLoc(TheSelect),
13993                             TheSelect->getValueType(0),
13994                             // FIXME: Discards pointer and AA info.
13995                             LLD->getChain(), Addr, MachinePointerInfo(),
13996                             LLD->getMemoryVT(), LLD->isVolatile(),
13997                             LLD->isNonTemporal(), isInvariant, Alignment);
13998     }
13999 
14000     // Users of the select now use the result of the load.
14001     CombineTo(TheSelect, Load);
14002 
14003     // Users of the old loads now use the new load's chain.  We know the
14004     // old-load value is dead now.
14005     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
14006     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
14007     return true;
14008   }
14009 
14010   return false;
14011 }
14012 
14013 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
14014 /// where 'cond' is the comparison specified by CC.
14015 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
14016                                       SDValue N2, SDValue N3,
14017                                       ISD::CondCode CC, bool NotExtCompare) {
14018   // (x ? y : y) -> y.
14019   if (N2 == N3) return N2;
14020 
14021   EVT VT = N2.getValueType();
14022   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
14023   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
14024 
14025   // Determine if the condition we're dealing with is constant
14026   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
14027                               N0, N1, CC, DL, false);
14028   if (SCC.getNode()) AddToWorklist(SCC.getNode());
14029 
14030   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
14031     // fold select_cc true, x, y -> x
14032     // fold select_cc false, x, y -> y
14033     return !SCCC->isNullValue() ? N2 : N3;
14034   }
14035 
14036   // Check to see if we can simplify the select into an fabs node
14037   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14038     // Allow either -0.0 or 0.0
14039     if (CFP->isZero()) {
14040       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
14041       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
14042           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
14043           N2 == N3.getOperand(0))
14044         return DAG.getNode(ISD::FABS, DL, VT, N0);
14045 
14046       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
14047       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
14048           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
14049           N2.getOperand(0) == N3)
14050         return DAG.getNode(ISD::FABS, DL, VT, N3);
14051     }
14052   }
14053 
14054   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
14055   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
14056   // in it.  This is a win when the constant is not otherwise available because
14057   // it replaces two constant pool loads with one.  We only do this if the FP
14058   // type is known to be legal, because if it isn't, then we are before legalize
14059   // types an we want the other legalization to happen first (e.g. to avoid
14060   // messing with soft float) and if the ConstantFP is not legal, because if
14061   // it is legal, we may not need to store the FP constant in a constant pool.
14062   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
14063     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
14064       if (TLI.isTypeLegal(N2.getValueType()) &&
14065           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
14066                TargetLowering::Legal &&
14067            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
14068            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
14069           // If both constants have multiple uses, then we won't need to do an
14070           // extra load, they are likely around in registers for other users.
14071           (TV->hasOneUse() || FV->hasOneUse())) {
14072         Constant *Elts[] = {
14073           const_cast<ConstantFP*>(FV->getConstantFPValue()),
14074           const_cast<ConstantFP*>(TV->getConstantFPValue())
14075         };
14076         Type *FPTy = Elts[0]->getType();
14077         const DataLayout &TD = DAG.getDataLayout();
14078 
14079         // Create a ConstantArray of the two constants.
14080         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
14081         SDValue CPIdx =
14082             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
14083                                 TD.getPrefTypeAlignment(FPTy));
14084         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
14085 
14086         // Get the offsets to the 0 and 1 element of the array so that we can
14087         // select between them.
14088         SDValue Zero = DAG.getIntPtrConstant(0, DL);
14089         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
14090         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
14091 
14092         SDValue Cond = DAG.getSetCC(DL,
14093                                     getSetCCResultType(N0.getValueType()),
14094                                     N0, N1, CC);
14095         AddToWorklist(Cond.getNode());
14096         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
14097                                           Cond, One, Zero);
14098         AddToWorklist(CstOffset.getNode());
14099         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
14100                             CstOffset);
14101         AddToWorklist(CPIdx.getNode());
14102         return DAG.getLoad(
14103             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
14104             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
14105             false, false, false, Alignment);
14106       }
14107     }
14108 
14109   // Check to see if we can perform the "gzip trick", transforming
14110   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
14111   if (isNullConstant(N3) && CC == ISD::SETLT &&
14112       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
14113        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
14114     EVT XType = N0.getValueType();
14115     EVT AType = N2.getValueType();
14116     if (XType.bitsGE(AType)) {
14117       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
14118       // single-bit constant.
14119       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
14120         unsigned ShCtV = N2C->getAPIntValue().logBase2();
14121         ShCtV = XType.getSizeInBits() - ShCtV - 1;
14122         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
14123                                        getShiftAmountTy(N0.getValueType()));
14124         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
14125                                     XType, N0, ShCt);
14126         AddToWorklist(Shift.getNode());
14127 
14128         if (XType.bitsGT(AType)) {
14129           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14130           AddToWorklist(Shift.getNode());
14131         }
14132 
14133         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14134       }
14135 
14136       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
14137                                   XType, N0,
14138                                   DAG.getConstant(XType.getSizeInBits() - 1,
14139                                                   SDLoc(N0),
14140                                          getShiftAmountTy(N0.getValueType())));
14141       AddToWorklist(Shift.getNode());
14142 
14143       if (XType.bitsGT(AType)) {
14144         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14145         AddToWorklist(Shift.getNode());
14146       }
14147 
14148       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14149     }
14150   }
14151 
14152   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
14153   // where y is has a single bit set.
14154   // A plaintext description would be, we can turn the SELECT_CC into an AND
14155   // when the condition can be materialized as an all-ones register.  Any
14156   // single bit-test can be materialized as an all-ones register with
14157   // shift-left and shift-right-arith.
14158   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
14159       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
14160     SDValue AndLHS = N0->getOperand(0);
14161     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14162     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
14163       // Shift the tested bit over the sign bit.
14164       APInt AndMask = ConstAndRHS->getAPIntValue();
14165       SDValue ShlAmt =
14166         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
14167                         getShiftAmountTy(AndLHS.getValueType()));
14168       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
14169 
14170       // Now arithmetic right shift it all the way over, so the result is either
14171       // all-ones, or zero.
14172       SDValue ShrAmt =
14173         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
14174                         getShiftAmountTy(Shl.getValueType()));
14175       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
14176 
14177       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
14178     }
14179   }
14180 
14181   // fold select C, 16, 0 -> shl C, 4
14182   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
14183       TLI.getBooleanContents(N0.getValueType()) ==
14184           TargetLowering::ZeroOrOneBooleanContent) {
14185 
14186     // If the caller doesn't want us to simplify this into a zext of a compare,
14187     // don't do it.
14188     if (NotExtCompare && N2C->isOne())
14189       return SDValue();
14190 
14191     // Get a SetCC of the condition
14192     // NOTE: Don't create a SETCC if it's not legal on this target.
14193     if (!LegalOperations ||
14194         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
14195       SDValue Temp, SCC;
14196       // cast from setcc result type to select result type
14197       if (LegalTypes) {
14198         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
14199                             N0, N1, CC);
14200         if (N2.getValueType().bitsLT(SCC.getValueType()))
14201           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
14202                                         N2.getValueType());
14203         else
14204           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14205                              N2.getValueType(), SCC);
14206       } else {
14207         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
14208         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14209                            N2.getValueType(), SCC);
14210       }
14211 
14212       AddToWorklist(SCC.getNode());
14213       AddToWorklist(Temp.getNode());
14214 
14215       if (N2C->isOne())
14216         return Temp;
14217 
14218       // shl setcc result by log2 n2c
14219       return DAG.getNode(
14220           ISD::SHL, DL, N2.getValueType(), Temp,
14221           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
14222                           getShiftAmountTy(Temp.getValueType())));
14223     }
14224   }
14225 
14226   // Check to see if this is an integer abs.
14227   // select_cc setg[te] X,  0,  X, -X ->
14228   // select_cc setgt    X, -1,  X, -X ->
14229   // select_cc setl[te] X,  0, -X,  X ->
14230   // select_cc setlt    X,  1, -X,  X ->
14231   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14232   if (N1C) {
14233     ConstantSDNode *SubC = nullptr;
14234     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14235          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
14236         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
14237       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
14238     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
14239               (N1C->isOne() && CC == ISD::SETLT)) &&
14240              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
14241       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
14242 
14243     EVT XType = N0.getValueType();
14244     if (SubC && SubC->isNullValue() && XType.isInteger()) {
14245       SDLoc DL(N0);
14246       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
14247                                   N0,
14248                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
14249                                          getShiftAmountTy(N0.getValueType())));
14250       SDValue Add = DAG.getNode(ISD::ADD, DL,
14251                                 XType, N0, Shift);
14252       AddToWorklist(Shift.getNode());
14253       AddToWorklist(Add.getNode());
14254       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
14255     }
14256   }
14257 
14258   return SDValue();
14259 }
14260 
14261 /// This is a stub for TargetLowering::SimplifySetCC.
14262 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
14263                                    SDValue N1, ISD::CondCode Cond,
14264                                    SDLoc DL, bool foldBooleans) {
14265   TargetLowering::DAGCombinerInfo
14266     DagCombineInfo(DAG, Level, false, this);
14267   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
14268 }
14269 
14270 /// Given an ISD::SDIV node expressing a divide by constant, return
14271 /// a DAG expression to select that will generate the same value by multiplying
14272 /// by a magic number.
14273 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14274 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
14275   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14276   if (!C)
14277     return SDValue();
14278 
14279   // Avoid division by zero.
14280   if (C->isNullValue())
14281     return SDValue();
14282 
14283   std::vector<SDNode*> Built;
14284   SDValue S =
14285       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14286 
14287   for (SDNode *N : Built)
14288     AddToWorklist(N);
14289   return S;
14290 }
14291 
14292 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
14293 /// DAG expression that will generate the same value by right shifting.
14294 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
14295   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14296   if (!C)
14297     return SDValue();
14298 
14299   // Avoid division by zero.
14300   if (C->isNullValue())
14301     return SDValue();
14302 
14303   std::vector<SDNode *> Built;
14304   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
14305 
14306   for (SDNode *N : Built)
14307     AddToWorklist(N);
14308   return S;
14309 }
14310 
14311 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
14312 /// expression that will generate the same value by multiplying by a magic
14313 /// number.
14314 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14315 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
14316   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14317   if (!C)
14318     return SDValue();
14319 
14320   // Avoid division by zero.
14321   if (C->isNullValue())
14322     return SDValue();
14323 
14324   std::vector<SDNode*> Built;
14325   SDValue S =
14326       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14327 
14328   for (SDNode *N : Built)
14329     AddToWorklist(N);
14330   return S;
14331 }
14332 
14333 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
14334   if (Level >= AfterLegalizeDAG)
14335     return SDValue();
14336 
14337   // Expose the DAG combiner to the target combiner implementations.
14338   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
14339 
14340   unsigned Iterations = 0;
14341   if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
14342     if (Iterations) {
14343       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14344       // For the reciprocal, we need to find the zero of the function:
14345       //   F(X) = A X - 1 [which has a zero at X = 1/A]
14346       //     =>
14347       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
14348       //     does not require additional intermediate precision]
14349       EVT VT = Op.getValueType();
14350       SDLoc DL(Op);
14351       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14352 
14353       AddToWorklist(Est.getNode());
14354 
14355       // Newton iterations: Est = Est + Est (1 - Arg * Est)
14356       for (unsigned i = 0; i < Iterations; ++i) {
14357         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
14358         AddToWorklist(NewEst.getNode());
14359 
14360         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
14361         AddToWorklist(NewEst.getNode());
14362 
14363         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14364         AddToWorklist(NewEst.getNode());
14365 
14366         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
14367         AddToWorklist(Est.getNode());
14368       }
14369     }
14370     return Est;
14371   }
14372 
14373   return SDValue();
14374 }
14375 
14376 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14377 /// For the reciprocal sqrt, we need to find the zero of the function:
14378 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14379 ///     =>
14380 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
14381 /// As a result, we precompute A/2 prior to the iteration loop.
14382 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
14383                                           unsigned Iterations,
14384                                           SDNodeFlags *Flags) {
14385   EVT VT = Arg.getValueType();
14386   SDLoc DL(Arg);
14387   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
14388 
14389   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
14390   // this entire sequence requires only one FP constant.
14391   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
14392   AddToWorklist(HalfArg.getNode());
14393 
14394   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
14395   AddToWorklist(HalfArg.getNode());
14396 
14397   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
14398   for (unsigned i = 0; i < Iterations; ++i) {
14399     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
14400     AddToWorklist(NewEst.getNode());
14401 
14402     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
14403     AddToWorklist(NewEst.getNode());
14404 
14405     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
14406     AddToWorklist(NewEst.getNode());
14407 
14408     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14409     AddToWorklist(Est.getNode());
14410   }
14411   return Est;
14412 }
14413 
14414 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14415 /// For the reciprocal sqrt, we need to find the zero of the function:
14416 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14417 ///     =>
14418 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
14419 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
14420                                           unsigned Iterations,
14421                                           SDNodeFlags *Flags) {
14422   EVT VT = Arg.getValueType();
14423   SDLoc DL(Arg);
14424   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
14425   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
14426 
14427   // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
14428   for (unsigned i = 0; i < Iterations; ++i) {
14429     SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
14430     AddToWorklist(HalfEst.getNode());
14431 
14432     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
14433     AddToWorklist(Est.getNode());
14434 
14435     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
14436     AddToWorklist(Est.getNode());
14437 
14438     Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
14439     AddToWorklist(Est.getNode());
14440 
14441     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
14442     AddToWorklist(Est.getNode());
14443   }
14444   return Est;
14445 }
14446 
14447 SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
14448   if (Level >= AfterLegalizeDAG)
14449     return SDValue();
14450 
14451   // Expose the DAG combiner to the target combiner implementations.
14452   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
14453   unsigned Iterations = 0;
14454   bool UseOneConstNR = false;
14455   if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
14456     AddToWorklist(Est.getNode());
14457     if (Iterations) {
14458       Est = UseOneConstNR ?
14459         BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
14460         BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
14461     }
14462     return Est;
14463   }
14464 
14465   return SDValue();
14466 }
14467 
14468 /// Return true if base is a frame index, which is known not to alias with
14469 /// anything but itself.  Provides base object and offset as results.
14470 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
14471                            const GlobalValue *&GV, const void *&CV) {
14472   // Assume it is a primitive operation.
14473   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
14474 
14475   // If it's an adding a simple constant then integrate the offset.
14476   if (Base.getOpcode() == ISD::ADD) {
14477     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
14478       Base = Base.getOperand(0);
14479       Offset += C->getZExtValue();
14480     }
14481   }
14482 
14483   // Return the underlying GlobalValue, and update the Offset.  Return false
14484   // for GlobalAddressSDNode since the same GlobalAddress may be represented
14485   // by multiple nodes with different offsets.
14486   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
14487     GV = G->getGlobal();
14488     Offset += G->getOffset();
14489     return false;
14490   }
14491 
14492   // Return the underlying Constant value, and update the Offset.  Return false
14493   // for ConstantSDNodes since the same constant pool entry may be represented
14494   // by multiple nodes with different offsets.
14495   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
14496     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
14497                                          : (const void *)C->getConstVal();
14498     Offset += C->getOffset();
14499     return false;
14500   }
14501   // If it's any of the following then it can't alias with anything but itself.
14502   return isa<FrameIndexSDNode>(Base);
14503 }
14504 
14505 /// Return true if there is any possibility that the two addresses overlap.
14506 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
14507   // If they are the same then they must be aliases.
14508   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
14509 
14510   // If they are both volatile then they cannot be reordered.
14511   if (Op0->isVolatile() && Op1->isVolatile()) return true;
14512 
14513   // If one operation reads from invariant memory, and the other may store, they
14514   // cannot alias. These should really be checking the equivalent of mayWrite,
14515   // but it only matters for memory nodes other than load /store.
14516   if (Op0->isInvariant() && Op1->writeMem())
14517     return false;
14518 
14519   if (Op1->isInvariant() && Op0->writeMem())
14520     return false;
14521 
14522   // Gather base node and offset information.
14523   SDValue Base1, Base2;
14524   int64_t Offset1, Offset2;
14525   const GlobalValue *GV1, *GV2;
14526   const void *CV1, *CV2;
14527   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
14528                                       Base1, Offset1, GV1, CV1);
14529   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
14530                                       Base2, Offset2, GV2, CV2);
14531 
14532   // If they have a same base address then check to see if they overlap.
14533   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
14534     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
14535              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
14536 
14537   // It is possible for different frame indices to alias each other, mostly
14538   // when tail call optimization reuses return address slots for arguments.
14539   // To catch this case, look up the actual index of frame indices to compute
14540   // the real alias relationship.
14541   if (isFrameIndex1 && isFrameIndex2) {
14542     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
14543     Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
14544     Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
14545     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
14546              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
14547   }
14548 
14549   // Otherwise, if we know what the bases are, and they aren't identical, then
14550   // we know they cannot alias.
14551   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
14552     return false;
14553 
14554   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
14555   // compared to the size and offset of the access, we may be able to prove they
14556   // do not alias.  This check is conservative for now to catch cases created by
14557   // splitting vector types.
14558   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
14559       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
14560       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
14561        Op1->getMemoryVT().getSizeInBits() >> 3) &&
14562       (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
14563     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
14564     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
14565 
14566     // There is no overlap between these relatively aligned accesses of similar
14567     // size, return no alias.
14568     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
14569         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
14570       return false;
14571   }
14572 
14573   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
14574                    ? CombinerGlobalAA
14575                    : DAG.getSubtarget().useAA();
14576 #ifndef NDEBUG
14577   if (CombinerAAOnlyFunc.getNumOccurrences() &&
14578       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
14579     UseAA = false;
14580 #endif
14581   if (UseAA &&
14582       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
14583     // Use alias analysis information.
14584     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
14585                                  Op1->getSrcValueOffset());
14586     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
14587         Op0->getSrcValueOffset() - MinOffset;
14588     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
14589         Op1->getSrcValueOffset() - MinOffset;
14590     AliasResult AAResult =
14591         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
14592                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
14593                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
14594                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
14595     if (AAResult == NoAlias)
14596       return false;
14597   }
14598 
14599   // Otherwise we have to assume they alias.
14600   return true;
14601 }
14602 
14603 /// Walk up chain skipping non-aliasing memory nodes,
14604 /// looking for aliasing nodes and adding them to the Aliases vector.
14605 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
14606                                    SmallVectorImpl<SDValue> &Aliases) {
14607   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
14608   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
14609 
14610   // Get alias information for node.
14611   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
14612 
14613   // Starting off.
14614   Chains.push_back(OriginalChain);
14615   unsigned Depth = 0;
14616 
14617   // Look at each chain and determine if it is an alias.  If so, add it to the
14618   // aliases list.  If not, then continue up the chain looking for the next
14619   // candidate.
14620   while (!Chains.empty()) {
14621     SDValue Chain = Chains.pop_back_val();
14622 
14623     // For TokenFactor nodes, look at each operand and only continue up the
14624     // chain until we reach the depth limit.
14625     //
14626     // FIXME: The depth check could be made to return the last non-aliasing
14627     // chain we found before we hit a tokenfactor rather than the original
14628     // chain.
14629     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
14630       Aliases.clear();
14631       Aliases.push_back(OriginalChain);
14632       return;
14633     }
14634 
14635     // Don't bother if we've been before.
14636     if (!Visited.insert(Chain.getNode()).second)
14637       continue;
14638 
14639     switch (Chain.getOpcode()) {
14640     case ISD::EntryToken:
14641       // Entry token is ideal chain operand, but handled in FindBetterChain.
14642       break;
14643 
14644     case ISD::LOAD:
14645     case ISD::STORE: {
14646       // Get alias information for Chain.
14647       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
14648           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
14649 
14650       // If chain is alias then stop here.
14651       if (!(IsLoad && IsOpLoad) &&
14652           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
14653         Aliases.push_back(Chain);
14654       } else {
14655         // Look further up the chain.
14656         Chains.push_back(Chain.getOperand(0));
14657         ++Depth;
14658       }
14659       break;
14660     }
14661 
14662     case ISD::TokenFactor:
14663       // We have to check each of the operands of the token factor for "small"
14664       // token factors, so we queue them up.  Adding the operands to the queue
14665       // (stack) in reverse order maintains the original order and increases the
14666       // likelihood that getNode will find a matching token factor (CSE.)
14667       if (Chain.getNumOperands() > 16) {
14668         Aliases.push_back(Chain);
14669         break;
14670       }
14671       for (unsigned n = Chain.getNumOperands(); n;)
14672         Chains.push_back(Chain.getOperand(--n));
14673       ++Depth;
14674       break;
14675 
14676     default:
14677       // For all other instructions we will just have to take what we can get.
14678       Aliases.push_back(Chain);
14679       break;
14680     }
14681   }
14682 
14683   // We need to be careful here to also search for aliases through the
14684   // value operand of a store, etc. Consider the following situation:
14685   //   Token1 = ...
14686   //   L1 = load Token1, %52
14687   //   S1 = store Token1, L1, %51
14688   //   L2 = load Token1, %52+8
14689   //   S2 = store Token1, L2, %51+8
14690   //   Token2 = Token(S1, S2)
14691   //   L3 = load Token2, %53
14692   //   S3 = store Token2, L3, %52
14693   //   L4 = load Token2, %53+8
14694   //   S4 = store Token2, L4, %52+8
14695   // If we search for aliases of S3 (which loads address %52), and we look
14696   // only through the chain, then we'll miss the trivial dependence on L1
14697   // (which also loads from %52). We then might change all loads and
14698   // stores to use Token1 as their chain operand, which could result in
14699   // copying %53 into %52 before copying %52 into %51 (which should
14700   // happen first).
14701   //
14702   // The problem is, however, that searching for such data dependencies
14703   // can become expensive, and the cost is not directly related to the
14704   // chain depth. Instead, we'll rule out such configurations here by
14705   // insisting that we've visited all chain users (except for users
14706   // of the original chain, which is not necessary). When doing this,
14707   // we need to look through nodes we don't care about (otherwise, things
14708   // like register copies will interfere with trivial cases).
14709 
14710   SmallVector<const SDNode *, 16> Worklist;
14711   for (const SDNode *N : Visited)
14712     if (N != OriginalChain.getNode())
14713       Worklist.push_back(N);
14714 
14715   while (!Worklist.empty()) {
14716     const SDNode *M = Worklist.pop_back_val();
14717 
14718     // We have already visited M, and want to make sure we've visited any uses
14719     // of M that we care about. For uses that we've not visisted, and don't
14720     // care about, queue them to the worklist.
14721 
14722     for (SDNode::use_iterator UI = M->use_begin(),
14723          UIE = M->use_end(); UI != UIE; ++UI)
14724       if (UI.getUse().getValueType() == MVT::Other &&
14725           Visited.insert(*UI).second) {
14726         if (isa<MemSDNode>(*UI)) {
14727           // We've not visited this use, and we care about it (it could have an
14728           // ordering dependency with the original node).
14729           Aliases.clear();
14730           Aliases.push_back(OriginalChain);
14731           return;
14732         }
14733 
14734         // We've not visited this use, but we don't care about it. Mark it as
14735         // visited and enqueue it to the worklist.
14736         Worklist.push_back(*UI);
14737       }
14738   }
14739 }
14740 
14741 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
14742 /// (aliasing node.)
14743 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
14744   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
14745 
14746   // Accumulate all the aliases to this node.
14747   GatherAllAliases(N, OldChain, Aliases);
14748 
14749   // If no operands then chain to entry token.
14750   if (Aliases.size() == 0)
14751     return DAG.getEntryNode();
14752 
14753   // If a single operand then chain to it.  We don't need to revisit it.
14754   if (Aliases.size() == 1)
14755     return Aliases[0];
14756 
14757   // Construct a custom tailored token factor.
14758   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
14759 }
14760 
14761 bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
14762   // This holds the base pointer, index, and the offset in bytes from the base
14763   // pointer.
14764   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
14765 
14766   // We must have a base and an offset.
14767   if (!BasePtr.Base.getNode())
14768     return false;
14769 
14770   // Do not handle stores to undef base pointers.
14771   if (BasePtr.Base.isUndef())
14772     return false;
14773 
14774   SmallVector<StoreSDNode *, 8> ChainedStores;
14775   ChainedStores.push_back(St);
14776 
14777   // Walk up the chain and look for nodes with offsets from the same
14778   // base pointer. Stop when reaching an instruction with a different kind
14779   // or instruction which has a different base pointer.
14780   StoreSDNode *Index = St;
14781   while (Index) {
14782     // If the chain has more than one use, then we can't reorder the mem ops.
14783     if (Index != St && !SDValue(Index, 0)->hasOneUse())
14784       break;
14785 
14786     if (Index->isVolatile() || Index->isIndexed())
14787       break;
14788 
14789     // Find the base pointer and offset for this memory node.
14790     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
14791 
14792     // Check that the base pointer is the same as the original one.
14793     if (!Ptr.equalBaseIndex(BasePtr))
14794       break;
14795 
14796     // Find the next memory operand in the chain. If the next operand in the
14797     // chain is a store then move up and continue the scan with the next
14798     // memory operand. If the next operand is a load save it and use alias
14799     // information to check if it interferes with anything.
14800     SDNode *NextInChain = Index->getChain().getNode();
14801     while (true) {
14802       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
14803         // We found a store node. Use it for the next iteration.
14804         if (STn->isVolatile() || STn->isIndexed()) {
14805           Index = nullptr;
14806           break;
14807         }
14808         ChainedStores.push_back(STn);
14809         Index = STn;
14810         break;
14811       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
14812         NextInChain = Ldn->getChain().getNode();
14813         continue;
14814       } else {
14815         Index = nullptr;
14816         break;
14817       }
14818     }
14819   }
14820 
14821   bool MadeChange = false;
14822   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
14823 
14824   for (StoreSDNode *ChainedStore : ChainedStores) {
14825     SDValue Chain = ChainedStore->getChain();
14826     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
14827 
14828     if (Chain != BetterChain) {
14829       MadeChange = true;
14830       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
14831     }
14832   }
14833 
14834   // Do all replacements after finding the replacements to make to avoid making
14835   // the chains more complicated by introducing new TokenFactors.
14836   for (auto Replacement : BetterChains)
14837     replaceStoreChain(Replacement.first, Replacement.second);
14838 
14839   return MadeChange;
14840 }
14841 
14842 /// This is the entry point for the file.
14843 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
14844                            CodeGenOpt::Level OptLevel) {
14845   /// This is the main entry point to this class.
14846   DAGCombiner(*this, AA, OptLevel).Run(Level);
14847 }
14848