1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Target/TargetLowering.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Target/TargetRegisterInfo.h"
40 #include "llvm/Target/TargetSubtargetInfo.h"
41 #include <algorithm>
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "dagcombine"
45 
46 STATISTIC(NodesCombined   , "Number of dag nodes combined");
47 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
48 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
49 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
50 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
51 STATISTIC(SlicedLoads, "Number of load sliced");
52 
53 namespace {
54   static cl::opt<bool>
55     CombinerAA("combiner-alias-analysis", cl::Hidden,
56                cl::desc("Enable DAG combiner alias-analysis heuristics"));
57 
58   static cl::opt<bool>
59     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
60                cl::desc("Enable DAG combiner's use of IR alias analysis"));
61 
62   static cl::opt<bool>
63     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
64                cl::desc("Enable DAG combiner's use of TBAA"));
65 
66 #ifndef NDEBUG
67   static cl::opt<std::string>
68     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
69                cl::desc("Only use DAG-combiner alias analysis in this"
70                         " function"));
71 #endif
72 
73   /// Hidden option to stress test load slicing, i.e., when this option
74   /// is enabled, load slicing bypasses most of its profitability guards.
75   static cl::opt<bool>
76   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
77                     cl::desc("Bypass the profitability model of load "
78                              "slicing"),
79                     cl::init(false));
80 
81   static cl::opt<bool>
82     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
83                       cl::desc("DAG combiner may split indexing from loads"));
84 
85 //------------------------------ DAGCombiner ---------------------------------//
86 
87   class DAGCombiner {
88     SelectionDAG &DAG;
89     const TargetLowering &TLI;
90     CombineLevel Level;
91     CodeGenOpt::Level OptLevel;
92     bool LegalOperations;
93     bool LegalTypes;
94     bool ForCodeSize;
95 
96     /// \brief Worklist of all of the nodes that need to be simplified.
97     ///
98     /// This must behave as a stack -- new nodes to process are pushed onto the
99     /// back and when processing we pop off of the back.
100     ///
101     /// The worklist will not contain duplicates but may contain null entries
102     /// due to nodes being deleted from the underlying DAG.
103     SmallVector<SDNode *, 64> Worklist;
104 
105     /// \brief Mapping from an SDNode to its position on the worklist.
106     ///
107     /// This is used to find and remove nodes from the worklist (by nulling
108     /// them) when they are deleted from the underlying DAG. It relies on
109     /// stable indices of nodes within the worklist.
110     DenseMap<SDNode *, unsigned> WorklistMap;
111 
112     /// \brief Set of nodes which have been combined (at least once).
113     ///
114     /// This is used to allow us to reliably add any operands of a DAG node
115     /// which have not yet been combined to the worklist.
116     SmallPtrSet<SDNode *, 32> CombinedNodes;
117 
118     // AA - Used for DAG load/store alias analysis.
119     AliasAnalysis &AA;
120 
121     /// When an instruction is simplified, add all users of the instruction to
122     /// the work lists because they might get more simplified now.
123     void AddUsersToWorklist(SDNode *N) {
124       for (SDNode *Node : N->uses())
125         AddToWorklist(Node);
126     }
127 
128     /// Call the node-specific routine that folds each particular type of node.
129     SDValue visit(SDNode *N);
130 
131   public:
132     /// Add to the worklist making sure its instance is at the back (next to be
133     /// processed.)
134     void AddToWorklist(SDNode *N) {
135       // Skip handle nodes as they can't usefully be combined and confuse the
136       // zero-use deletion strategy.
137       if (N->getOpcode() == ISD::HANDLENODE)
138         return;
139 
140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
141         Worklist.push_back(N);
142     }
143 
144     /// Remove all instances of N from the worklist.
145     void removeFromWorklist(SDNode *N) {
146       CombinedNodes.erase(N);
147 
148       auto It = WorklistMap.find(N);
149       if (It == WorklistMap.end())
150         return; // Not in the worklist.
151 
152       // Null out the entry rather than erasing it to avoid a linear operation.
153       Worklist[It->second] = nullptr;
154       WorklistMap.erase(It);
155     }
156 
157     void deleteAndRecombine(SDNode *N);
158     bool recursivelyDeleteUnusedNodes(SDNode *N);
159 
160     /// Replaces all uses of the results of one DAG node with new values.
161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
162                       bool AddTo = true);
163 
164     /// Replaces all uses of the results of one DAG node with new values.
165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
166       return CombineTo(N, &Res, 1, AddTo);
167     }
168 
169     /// Replaces all uses of the results of one DAG node with new values.
170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
171                       bool AddTo = true) {
172       SDValue To[] = { Res0, Res1 };
173       return CombineTo(N, To, 2, AddTo);
174     }
175 
176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
177 
178   private:
179 
180     /// Check the specified integer node value to see if it can be simplified or
181     /// if things it uses can be simplified by bit propagation.
182     /// If so, return true.
183     bool SimplifyDemandedBits(SDValue Op) {
184       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
185       APInt Demanded = APInt::getAllOnesValue(BitWidth);
186       return SimplifyDemandedBits(Op, Demanded);
187     }
188 
189     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
190 
191     bool CombineToPreIndexedLoadStore(SDNode *N);
192     bool CombineToPostIndexedLoadStore(SDNode *N);
193     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
194     bool SliceUpLoad(SDNode *N);
195 
196     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
197     ///   load.
198     ///
199     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
200     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
201     /// \param EltNo index of the vector element to load.
202     /// \param OriginalLoad load that EVE came from to be replaced.
203     /// \returns EVE on success SDValue() on failure.
204     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
205         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
206     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
207     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
208     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
209     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
210     SDValue PromoteIntBinOp(SDValue Op);
211     SDValue PromoteIntShiftOp(SDValue Op);
212     SDValue PromoteExtend(SDValue Op);
213     bool PromoteLoad(SDValue Op);
214 
215     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
216                          SDValue ExtLoad, const SDLoc &DL,
217                          ISD::NodeType ExtType);
218 
219     /// Call the node-specific routine that knows how to fold each
220     /// particular type of node. If that doesn't do anything, try the
221     /// target-specific DAG combines.
222     SDValue combine(SDNode *N);
223 
224     // Visitation implementation - Implement dag node combining for different
225     // node types.  The semantics are as follows:
226     // Return Value:
227     //   SDValue.getNode() == 0 - No change was made
228     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
229     //   otherwise              - N should be replaced by the returned Operand.
230     //
231     SDValue visitTokenFactor(SDNode *N);
232     SDValue visitMERGE_VALUES(SDNode *N);
233     SDValue visitADD(SDNode *N);
234     SDValue visitSUB(SDNode *N);
235     SDValue visitADDC(SDNode *N);
236     SDValue visitSUBC(SDNode *N);
237     SDValue visitADDE(SDNode *N);
238     SDValue visitSUBE(SDNode *N);
239     SDValue visitMUL(SDNode *N);
240     SDValue useDivRem(SDNode *N);
241     SDValue visitSDIV(SDNode *N);
242     SDValue visitUDIV(SDNode *N);
243     SDValue visitREM(SDNode *N);
244     SDValue visitMULHU(SDNode *N);
245     SDValue visitMULHS(SDNode *N);
246     SDValue visitSMUL_LOHI(SDNode *N);
247     SDValue visitUMUL_LOHI(SDNode *N);
248     SDValue visitSMULO(SDNode *N);
249     SDValue visitUMULO(SDNode *N);
250     SDValue visitIMINMAX(SDNode *N);
251     SDValue visitAND(SDNode *N);
252     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
253     SDValue visitOR(SDNode *N);
254     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
255     SDValue visitXOR(SDNode *N);
256     SDValue SimplifyVBinOp(SDNode *N);
257     SDValue visitSHL(SDNode *N);
258     SDValue visitSRA(SDNode *N);
259     SDValue visitSRL(SDNode *N);
260     SDValue visitRotate(SDNode *N);
261     SDValue visitBSWAP(SDNode *N);
262     SDValue visitBITREVERSE(SDNode *N);
263     SDValue visitCTLZ(SDNode *N);
264     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
265     SDValue visitCTTZ(SDNode *N);
266     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
267     SDValue visitCTPOP(SDNode *N);
268     SDValue visitSELECT(SDNode *N);
269     SDValue visitVSELECT(SDNode *N);
270     SDValue visitSELECT_CC(SDNode *N);
271     SDValue visitSETCC(SDNode *N);
272     SDValue visitSETCCE(SDNode *N);
273     SDValue visitSIGN_EXTEND(SDNode *N);
274     SDValue visitZERO_EXTEND(SDNode *N);
275     SDValue visitANY_EXTEND(SDNode *N);
276     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
277     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
278     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
279     SDValue visitTRUNCATE(SDNode *N);
280     SDValue visitBITCAST(SDNode *N);
281     SDValue visitBUILD_PAIR(SDNode *N);
282     SDValue visitFADD(SDNode *N);
283     SDValue visitFSUB(SDNode *N);
284     SDValue visitFMUL(SDNode *N);
285     SDValue visitFMA(SDNode *N);
286     SDValue visitFDIV(SDNode *N);
287     SDValue visitFREM(SDNode *N);
288     SDValue visitFSQRT(SDNode *N);
289     SDValue visitFCOPYSIGN(SDNode *N);
290     SDValue visitSINT_TO_FP(SDNode *N);
291     SDValue visitUINT_TO_FP(SDNode *N);
292     SDValue visitFP_TO_SINT(SDNode *N);
293     SDValue visitFP_TO_UINT(SDNode *N);
294     SDValue visitFP_ROUND(SDNode *N);
295     SDValue visitFP_ROUND_INREG(SDNode *N);
296     SDValue visitFP_EXTEND(SDNode *N);
297     SDValue visitFNEG(SDNode *N);
298     SDValue visitFABS(SDNode *N);
299     SDValue visitFCEIL(SDNode *N);
300     SDValue visitFTRUNC(SDNode *N);
301     SDValue visitFFLOOR(SDNode *N);
302     SDValue visitFMINNUM(SDNode *N);
303     SDValue visitFMAXNUM(SDNode *N);
304     SDValue visitBRCOND(SDNode *N);
305     SDValue visitBR_CC(SDNode *N);
306     SDValue visitLOAD(SDNode *N);
307 
308     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
309     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
310 
311     SDValue visitSTORE(SDNode *N);
312     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
313     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
314     SDValue visitBUILD_VECTOR(SDNode *N);
315     SDValue visitCONCAT_VECTORS(SDNode *N);
316     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
317     SDValue visitVECTOR_SHUFFLE(SDNode *N);
318     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
319     SDValue visitINSERT_SUBVECTOR(SDNode *N);
320     SDValue visitMLOAD(SDNode *N);
321     SDValue visitMSTORE(SDNode *N);
322     SDValue visitMGATHER(SDNode *N);
323     SDValue visitMSCATTER(SDNode *N);
324     SDValue visitFP_TO_FP16(SDNode *N);
325     SDValue visitFP16_TO_FP(SDNode *N);
326 
327     SDValue visitFADDForFMACombine(SDNode *N);
328     SDValue visitFSUBForFMACombine(SDNode *N);
329     SDValue visitFMULForFMACombine(SDNode *N);
330 
331     SDValue XformToShuffleWithZero(SDNode *N);
332     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
333                            SDValue RHS);
334 
335     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
336 
337     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
338     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
339     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
340     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
341                              SDValue N2, SDValue N3, ISD::CondCode CC,
342                              bool NotExtCompare = false);
343     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
344                           const SDLoc &DL, bool foldBooleans = true);
345 
346     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
347                            SDValue &CC) const;
348     bool isOneUseSetCC(SDValue N) const;
349 
350     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
351                                          unsigned HiOp);
352     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
353     SDValue CombineExtLoad(SDNode *N);
354     SDValue combineRepeatedFPDivisors(SDNode *N);
355     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
356     SDValue BuildSDIV(SDNode *N);
357     SDValue BuildSDIVPow2(SDNode *N);
358     SDValue BuildUDIV(SDNode *N);
359     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
360     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
361     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
362     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
363     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
364                                 SDNodeFlags *Flags, bool Reciprocal);
365     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
366                                 SDNodeFlags *Flags, bool Reciprocal);
367     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
368                                bool DemandHighBits = true);
369     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
370     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
371                               SDValue InnerPos, SDValue InnerNeg,
372                               unsigned PosOpcode, unsigned NegOpcode,
373                               const SDLoc &DL);
374     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
375     SDValue ReduceLoadWidth(SDNode *N);
376     SDValue ReduceLoadOpStoreWidth(SDNode *N);
377     SDValue splitMergedValStore(StoreSDNode *ST);
378     SDValue TransformFPLoadStorePair(SDNode *N);
379     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
380     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
381     SDValue reduceBuildVecToShuffle(SDNode *N);
382 
383     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
384 
385     /// Walk up chain skipping non-aliasing memory nodes,
386     /// looking for aliasing nodes and adding them to the Aliases vector.
387     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
388                           SmallVectorImpl<SDValue> &Aliases);
389 
390     /// Return true if there is any possibility that the two addresses overlap.
391     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
392 
393     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
394     /// chain (aliasing node.)
395     SDValue FindBetterChain(SDNode *N, SDValue Chain);
396 
397     /// Try to replace a store and any possibly adjacent stores on
398     /// consecutive chains with better chains. Return true only if St is
399     /// replaced.
400     ///
401     /// Notice that other chains may still be replaced even if the function
402     /// returns false.
403     bool findBetterNeighborChains(StoreSDNode *St);
404 
405     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
406     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
407 
408     /// Holds a pointer to an LSBaseSDNode as well as information on where it
409     /// is located in a sequence of memory operations connected by a chain.
410     struct MemOpLink {
411       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
412       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
413       // Ptr to the mem node.
414       LSBaseSDNode *MemNode;
415       // Offset from the base ptr.
416       int64_t OffsetFromBase;
417       // What is the sequence number of this mem node.
418       // Lowest mem operand in the DAG starts at zero.
419       unsigned SequenceNum;
420     };
421 
422     /// This is a helper function for visitMUL to check the profitability
423     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
424     /// MulNode is the original multiply, AddNode is (add x, c1),
425     /// and ConstNode is c2.
426     bool isMulAddWithConstProfitable(SDNode *MulNode,
427                                      SDValue &AddNode,
428                                      SDValue &ConstNode);
429 
430     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
431     /// constant build_vector of the stored constant values in Stores.
432     SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
433                                          ArrayRef<MemOpLink> Stores,
434                                          SmallVectorImpl<SDValue> &Chains,
435                                          EVT Ty) const;
436 
437     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
438     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
439     /// the type of the loaded value to be extended.  LoadedVT returns the type
440     /// of the original loaded value.  NarrowLoad returns whether the load would
441     /// need to be narrowed in order to match.
442     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
443                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
444                           bool &NarrowLoad);
445 
446     /// This is a helper function for MergeConsecutiveStores. When the source
447     /// elements of the consecutive stores are all constants or all extracted
448     /// vector elements, try to merge them into one larger store.
449     /// \return True if a merged store was created.
450     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
451                                          EVT MemVT, unsigned NumStores,
452                                          bool IsConstantSrc, bool UseVector);
453 
454     /// This is a helper function for MergeConsecutiveStores.
455     /// Stores that may be merged are placed in StoreNodes.
456     /// Loads that may alias with those stores are placed in AliasLoadNodes.
457     void getStoreMergeAndAliasCandidates(
458         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
459         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
460 
461     /// Helper function for MergeConsecutiveStores. Checks if
462     /// Candidate stores have indirect dependency through their
463     /// operands. \return True if safe to merge
464     bool checkMergeStoreCandidatesForDependencies(
465         SmallVectorImpl<MemOpLink> &StoreNodes);
466 
467     /// Merge consecutive store operations into a wide store.
468     /// This optimization uses wide integers or vectors when possible.
469     /// \return True if some memory operations were changed.
470     bool MergeConsecutiveStores(StoreSDNode *N);
471 
472     /// \brief Try to transform a truncation where C is a constant:
473     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
474     ///
475     /// \p N needs to be a truncation and its first operand an AND. Other
476     /// requirements are checked by the function (e.g. that trunc is
477     /// single-use) and if missed an empty SDValue is returned.
478     SDValue distributeTruncateThroughAnd(SDNode *N);
479 
480   public:
481     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
482         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
483           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
484       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
485     }
486 
487     /// Runs the dag combiner on all nodes in the work list
488     void Run(CombineLevel AtLevel);
489 
490     SelectionDAG &getDAG() const { return DAG; }
491 
492     /// Returns a type large enough to hold any valid shift amount - before type
493     /// legalization these can be huge.
494     EVT getShiftAmountTy(EVT LHSTy) {
495       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
496       if (LHSTy.isVector())
497         return LHSTy;
498       auto &DL = DAG.getDataLayout();
499       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
500                         : TLI.getPointerTy(DL);
501     }
502 
503     /// This method returns true if we are running before type legalization or
504     /// if the specified VT is legal.
505     bool isTypeLegal(const EVT &VT) {
506       if (!LegalTypes) return true;
507       return TLI.isTypeLegal(VT);
508     }
509 
510     /// Convenience wrapper around TargetLowering::getSetCCResultType
511     EVT getSetCCResultType(EVT VT) const {
512       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
513     }
514   };
515 }
516 
517 
518 namespace {
519 /// This class is a DAGUpdateListener that removes any deleted
520 /// nodes from the worklist.
521 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
522   DAGCombiner &DC;
523 public:
524   explicit WorklistRemover(DAGCombiner &dc)
525     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
526 
527   void NodeDeleted(SDNode *N, SDNode *E) override {
528     DC.removeFromWorklist(N);
529   }
530 };
531 }
532 
533 //===----------------------------------------------------------------------===//
534 //  TargetLowering::DAGCombinerInfo implementation
535 //===----------------------------------------------------------------------===//
536 
537 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
538   ((DAGCombiner*)DC)->AddToWorklist(N);
539 }
540 
541 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
542   ((DAGCombiner*)DC)->removeFromWorklist(N);
543 }
544 
545 SDValue TargetLowering::DAGCombinerInfo::
546 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
547   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
548 }
549 
550 SDValue TargetLowering::DAGCombinerInfo::
551 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
552   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
553 }
554 
555 
556 SDValue TargetLowering::DAGCombinerInfo::
557 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
558   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
559 }
560 
561 void TargetLowering::DAGCombinerInfo::
562 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
563   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
564 }
565 
566 //===----------------------------------------------------------------------===//
567 // Helper Functions
568 //===----------------------------------------------------------------------===//
569 
570 void DAGCombiner::deleteAndRecombine(SDNode *N) {
571   removeFromWorklist(N);
572 
573   // If the operands of this node are only used by the node, they will now be
574   // dead. Make sure to re-visit them and recursively delete dead nodes.
575   for (const SDValue &Op : N->ops())
576     // For an operand generating multiple values, one of the values may
577     // become dead allowing further simplification (e.g. split index
578     // arithmetic from an indexed load).
579     if (Op->hasOneUse() || Op->getNumValues() > 1)
580       AddToWorklist(Op.getNode());
581 
582   DAG.DeleteNode(N);
583 }
584 
585 /// Return 1 if we can compute the negated form of the specified expression for
586 /// the same cost as the expression itself, or 2 if we can compute the negated
587 /// form more cheaply than the expression itself.
588 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
589                                const TargetLowering &TLI,
590                                const TargetOptions *Options,
591                                unsigned Depth = 0) {
592   // fneg is removable even if it has multiple uses.
593   if (Op.getOpcode() == ISD::FNEG) return 2;
594 
595   // Don't allow anything with multiple uses.
596   if (!Op.hasOneUse()) return 0;
597 
598   // Don't recurse exponentially.
599   if (Depth > 6) return 0;
600 
601   switch (Op.getOpcode()) {
602   default: return false;
603   case ISD::ConstantFP:
604     // Don't invert constant FP values after legalize.  The negated constant
605     // isn't necessarily legal.
606     return LegalOperations ? 0 : 1;
607   case ISD::FADD:
608     // FIXME: determine better conditions for this xform.
609     if (!Options->UnsafeFPMath) return 0;
610 
611     // After operation legalization, it might not be legal to create new FSUBs.
612     if (LegalOperations &&
613         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
614       return 0;
615 
616     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
617     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
618                                     Options, Depth + 1))
619       return V;
620     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
621     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
622                               Depth + 1);
623   case ISD::FSUB:
624     // We can't turn -(A-B) into B-A when we honor signed zeros.
625     if (!Options->UnsafeFPMath) return 0;
626 
627     // fold (fneg (fsub A, B)) -> (fsub B, A)
628     return 1;
629 
630   case ISD::FMUL:
631   case ISD::FDIV:
632     if (Options->HonorSignDependentRoundingFPMath()) return 0;
633 
634     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
635     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
636                                     Options, Depth + 1))
637       return V;
638 
639     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
640                               Depth + 1);
641 
642   case ISD::FP_EXTEND:
643   case ISD::FP_ROUND:
644   case ISD::FSIN:
645     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
646                               Depth + 1);
647   }
648 }
649 
650 /// If isNegatibleForFree returns true, return the newly negated expression.
651 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
652                                     bool LegalOperations, unsigned Depth = 0) {
653   const TargetOptions &Options = DAG.getTarget().Options;
654   // fneg is removable even if it has multiple uses.
655   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
656 
657   // Don't allow anything with multiple uses.
658   assert(Op.hasOneUse() && "Unknown reuse!");
659 
660   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
661 
662   const SDNodeFlags *Flags = Op.getNode()->getFlags();
663 
664   switch (Op.getOpcode()) {
665   default: llvm_unreachable("Unknown code");
666   case ISD::ConstantFP: {
667     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
668     V.changeSign();
669     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
670   }
671   case ISD::FADD:
672     // FIXME: determine better conditions for this xform.
673     assert(Options.UnsafeFPMath);
674 
675     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
676     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
677                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
678       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
679                          GetNegatedExpression(Op.getOperand(0), DAG,
680                                               LegalOperations, Depth+1),
681                          Op.getOperand(1), Flags);
682     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
683     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
684                        GetNegatedExpression(Op.getOperand(1), DAG,
685                                             LegalOperations, Depth+1),
686                        Op.getOperand(0), Flags);
687   case ISD::FSUB:
688     // We can't turn -(A-B) into B-A when we honor signed zeros.
689     assert(Options.UnsafeFPMath);
690 
691     // fold (fneg (fsub 0, B)) -> B
692     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
693       if (N0CFP->isZero())
694         return Op.getOperand(1);
695 
696     // fold (fneg (fsub A, B)) -> (fsub B, A)
697     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
698                        Op.getOperand(1), Op.getOperand(0), Flags);
699 
700   case ISD::FMUL:
701   case ISD::FDIV:
702     assert(!Options.HonorSignDependentRoundingFPMath());
703 
704     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
705     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
706                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
707       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
708                          GetNegatedExpression(Op.getOperand(0), DAG,
709                                               LegalOperations, Depth+1),
710                          Op.getOperand(1), Flags);
711 
712     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
713     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
714                        Op.getOperand(0),
715                        GetNegatedExpression(Op.getOperand(1), DAG,
716                                             LegalOperations, Depth+1), Flags);
717 
718   case ISD::FP_EXTEND:
719   case ISD::FSIN:
720     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
721                        GetNegatedExpression(Op.getOperand(0), DAG,
722                                             LegalOperations, Depth+1));
723   case ISD::FP_ROUND:
724       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
725                          GetNegatedExpression(Op.getOperand(0), DAG,
726                                               LegalOperations, Depth+1),
727                          Op.getOperand(1));
728   }
729 }
730 
731 // APInts must be the same size for most operations, this helper
732 // function zero extends the shorter of the pair so that they match.
733 // We provide an Offset so that we can create bitwidths that won't overflow.
734 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
735   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
736   LHS = LHS.zextOrSelf(Bits);
737   RHS = RHS.zextOrSelf(Bits);
738 }
739 
740 // Return true if this node is a setcc, or is a select_cc
741 // that selects between the target values used for true and false, making it
742 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
743 // the appropriate nodes based on the type of node we are checking. This
744 // simplifies life a bit for the callers.
745 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
746                                     SDValue &CC) const {
747   if (N.getOpcode() == ISD::SETCC) {
748     LHS = N.getOperand(0);
749     RHS = N.getOperand(1);
750     CC  = N.getOperand(2);
751     return true;
752   }
753 
754   if (N.getOpcode() != ISD::SELECT_CC ||
755       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
756       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
757     return false;
758 
759   if (TLI.getBooleanContents(N.getValueType()) ==
760       TargetLowering::UndefinedBooleanContent)
761     return false;
762 
763   LHS = N.getOperand(0);
764   RHS = N.getOperand(1);
765   CC  = N.getOperand(4);
766   return true;
767 }
768 
769 /// Return true if this is a SetCC-equivalent operation with only one use.
770 /// If this is true, it allows the users to invert the operation for free when
771 /// it is profitable to do so.
772 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
773   SDValue N0, N1, N2;
774   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
775     return true;
776   return false;
777 }
778 
779 // \brief Returns the SDNode if it is a constant float BuildVector
780 // or constant float.
781 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
782   if (isa<ConstantFPSDNode>(N))
783     return N.getNode();
784   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
785     return N.getNode();
786   return nullptr;
787 }
788 
789 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
790 // int.
791 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
792   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
793     return CN;
794 
795   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
796     BitVector UndefElements;
797     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
798 
799     // BuildVectors can truncate their operands. Ignore that case here.
800     // FIXME: We blindly ignore splats which include undef which is overly
801     // pessimistic.
802     if (CN && UndefElements.none() &&
803         CN->getValueType(0) == N.getValueType().getScalarType())
804       return CN;
805   }
806 
807   return nullptr;
808 }
809 
810 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
811 // float.
812 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
813   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
814     return CN;
815 
816   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
817     BitVector UndefElements;
818     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
819 
820     if (CN && UndefElements.none())
821       return CN;
822   }
823 
824   return nullptr;
825 }
826 
827 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
828                                     SDValue N1) {
829   EVT VT = N0.getValueType();
830   if (N0.getOpcode() == Opc) {
831     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
832       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
833         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
834         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
835           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
836         return SDValue();
837       }
838       if (N0.hasOneUse()) {
839         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
840         // use
841         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
842         if (!OpNode.getNode())
843           return SDValue();
844         AddToWorklist(OpNode.getNode());
845         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
846       }
847     }
848   }
849 
850   if (N1.getOpcode() == Opc) {
851     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
852       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
853         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
854         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
855           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
856         return SDValue();
857       }
858       if (N1.hasOneUse()) {
859         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
860         // use
861         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
862         if (!OpNode.getNode())
863           return SDValue();
864         AddToWorklist(OpNode.getNode());
865         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
866       }
867     }
868   }
869 
870   return SDValue();
871 }
872 
873 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
874                                bool AddTo) {
875   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
876   ++NodesCombined;
877   DEBUG(dbgs() << "\nReplacing.1 ";
878         N->dump(&DAG);
879         dbgs() << "\nWith: ";
880         To[0].getNode()->dump(&DAG);
881         dbgs() << " and " << NumTo-1 << " other values\n");
882   for (unsigned i = 0, e = NumTo; i != e; ++i)
883     assert((!To[i].getNode() ||
884             N->getValueType(i) == To[i].getValueType()) &&
885            "Cannot combine value to value of different type!");
886 
887   WorklistRemover DeadNodes(*this);
888   DAG.ReplaceAllUsesWith(N, To);
889   if (AddTo) {
890     // Push the new nodes and any users onto the worklist
891     for (unsigned i = 0, e = NumTo; i != e; ++i) {
892       if (To[i].getNode()) {
893         AddToWorklist(To[i].getNode());
894         AddUsersToWorklist(To[i].getNode());
895       }
896     }
897   }
898 
899   // Finally, if the node is now dead, remove it from the graph.  The node
900   // may not be dead if the replacement process recursively simplified to
901   // something else needing this node.
902   if (N->use_empty())
903     deleteAndRecombine(N);
904   return SDValue(N, 0);
905 }
906 
907 void DAGCombiner::
908 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
909   // Replace all uses.  If any nodes become isomorphic to other nodes and
910   // are deleted, make sure to remove them from our worklist.
911   WorklistRemover DeadNodes(*this);
912   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
913 
914   // Push the new node and any (possibly new) users onto the worklist.
915   AddToWorklist(TLO.New.getNode());
916   AddUsersToWorklist(TLO.New.getNode());
917 
918   // Finally, if the node is now dead, remove it from the graph.  The node
919   // may not be dead if the replacement process recursively simplified to
920   // something else needing this node.
921   if (TLO.Old.getNode()->use_empty())
922     deleteAndRecombine(TLO.Old.getNode());
923 }
924 
925 /// Check the specified integer node value to see if it can be simplified or if
926 /// things it uses can be simplified by bit propagation. If so, return true.
927 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
928   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
929   APInt KnownZero, KnownOne;
930   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
931     return false;
932 
933   // Revisit the node.
934   AddToWorklist(Op.getNode());
935 
936   // Replace the old value with the new one.
937   ++NodesCombined;
938   DEBUG(dbgs() << "\nReplacing.2 ";
939         TLO.Old.getNode()->dump(&DAG);
940         dbgs() << "\nWith: ";
941         TLO.New.getNode()->dump(&DAG);
942         dbgs() << '\n');
943 
944   CommitTargetLoweringOpt(TLO);
945   return true;
946 }
947 
948 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
949   SDLoc dl(Load);
950   EVT VT = Load->getValueType(0);
951   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
952 
953   DEBUG(dbgs() << "\nReplacing.9 ";
954         Load->dump(&DAG);
955         dbgs() << "\nWith: ";
956         Trunc.getNode()->dump(&DAG);
957         dbgs() << '\n');
958   WorklistRemover DeadNodes(*this);
959   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
960   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
961   deleteAndRecombine(Load);
962   AddToWorklist(Trunc.getNode());
963 }
964 
965 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
966   Replace = false;
967   SDLoc dl(Op);
968   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
969     LoadSDNode *LD = cast<LoadSDNode>(Op);
970     EVT MemVT = LD->getMemoryVT();
971     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
972       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
973                                                        : ISD::EXTLOAD)
974       : LD->getExtensionType();
975     Replace = true;
976     return DAG.getExtLoad(ExtType, dl, PVT,
977                           LD->getChain(), LD->getBasePtr(),
978                           MemVT, LD->getMemOperand());
979   }
980 
981   unsigned Opc = Op.getOpcode();
982   switch (Opc) {
983   default: break;
984   case ISD::AssertSext:
985     return DAG.getNode(ISD::AssertSext, dl, PVT,
986                        SExtPromoteOperand(Op.getOperand(0), PVT),
987                        Op.getOperand(1));
988   case ISD::AssertZext:
989     return DAG.getNode(ISD::AssertZext, dl, PVT,
990                        ZExtPromoteOperand(Op.getOperand(0), PVT),
991                        Op.getOperand(1));
992   case ISD::Constant: {
993     unsigned ExtOpc =
994       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
995     return DAG.getNode(ExtOpc, dl, PVT, Op);
996   }
997   }
998 
999   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1000     return SDValue();
1001   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
1002 }
1003 
1004 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1005   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1006     return SDValue();
1007   EVT OldVT = Op.getValueType();
1008   SDLoc dl(Op);
1009   bool Replace = false;
1010   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1011   if (!NewOp.getNode())
1012     return SDValue();
1013   AddToWorklist(NewOp.getNode());
1014 
1015   if (Replace)
1016     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1017   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
1018                      DAG.getValueType(OldVT));
1019 }
1020 
1021 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1022   EVT OldVT = Op.getValueType();
1023   SDLoc dl(Op);
1024   bool Replace = false;
1025   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1026   if (!NewOp.getNode())
1027     return SDValue();
1028   AddToWorklist(NewOp.getNode());
1029 
1030   if (Replace)
1031     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1032   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
1033 }
1034 
1035 /// Promote the specified integer binary operation if the target indicates it is
1036 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1037 /// i32 since i16 instructions are longer.
1038 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1039   if (!LegalOperations)
1040     return SDValue();
1041 
1042   EVT VT = Op.getValueType();
1043   if (VT.isVector() || !VT.isInteger())
1044     return SDValue();
1045 
1046   // If operation type is 'undesirable', e.g. i16 on x86, consider
1047   // promoting it.
1048   unsigned Opc = Op.getOpcode();
1049   if (TLI.isTypeDesirableForOp(Opc, VT))
1050     return SDValue();
1051 
1052   EVT PVT = VT;
1053   // Consult target whether it is a good idea to promote this operation and
1054   // what's the right type to promote it to.
1055   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1056     assert(PVT != VT && "Don't know what type to promote to!");
1057 
1058     bool Replace0 = false;
1059     SDValue N0 = Op.getOperand(0);
1060     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1061     if (!NN0.getNode())
1062       return SDValue();
1063 
1064     bool Replace1 = false;
1065     SDValue N1 = Op.getOperand(1);
1066     SDValue NN1;
1067     if (N0 == N1)
1068       NN1 = NN0;
1069     else {
1070       NN1 = PromoteOperand(N1, PVT, Replace1);
1071       if (!NN1.getNode())
1072         return SDValue();
1073     }
1074 
1075     AddToWorklist(NN0.getNode());
1076     if (NN1.getNode())
1077       AddToWorklist(NN1.getNode());
1078 
1079     if (Replace0)
1080       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1081     if (Replace1)
1082       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1083 
1084     DEBUG(dbgs() << "\nPromoting ";
1085           Op.getNode()->dump(&DAG));
1086     SDLoc dl(Op);
1087     return DAG.getNode(ISD::TRUNCATE, dl, VT,
1088                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
1089   }
1090   return SDValue();
1091 }
1092 
1093 /// Promote the specified integer shift operation if the target indicates it is
1094 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1095 /// i32 since i16 instructions are longer.
1096 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1097   if (!LegalOperations)
1098     return SDValue();
1099 
1100   EVT VT = Op.getValueType();
1101   if (VT.isVector() || !VT.isInteger())
1102     return SDValue();
1103 
1104   // If operation type is 'undesirable', e.g. i16 on x86, consider
1105   // promoting it.
1106   unsigned Opc = Op.getOpcode();
1107   if (TLI.isTypeDesirableForOp(Opc, VT))
1108     return SDValue();
1109 
1110   EVT PVT = VT;
1111   // Consult target whether it is a good idea to promote this operation and
1112   // what's the right type to promote it to.
1113   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1114     assert(PVT != VT && "Don't know what type to promote to!");
1115 
1116     bool Replace = false;
1117     SDValue N0 = Op.getOperand(0);
1118     if (Opc == ISD::SRA)
1119       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1120     else if (Opc == ISD::SRL)
1121       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1122     else
1123       N0 = PromoteOperand(N0, PVT, Replace);
1124     if (!N0.getNode())
1125       return SDValue();
1126 
1127     AddToWorklist(N0.getNode());
1128     if (Replace)
1129       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1130 
1131     DEBUG(dbgs() << "\nPromoting ";
1132           Op.getNode()->dump(&DAG));
1133     SDLoc dl(Op);
1134     return DAG.getNode(ISD::TRUNCATE, dl, VT,
1135                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
1136   }
1137   return SDValue();
1138 }
1139 
1140 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1141   if (!LegalOperations)
1142     return SDValue();
1143 
1144   EVT VT = Op.getValueType();
1145   if (VT.isVector() || !VT.isInteger())
1146     return SDValue();
1147 
1148   // If operation type is 'undesirable', e.g. i16 on x86, consider
1149   // promoting it.
1150   unsigned Opc = Op.getOpcode();
1151   if (TLI.isTypeDesirableForOp(Opc, VT))
1152     return SDValue();
1153 
1154   EVT PVT = VT;
1155   // Consult target whether it is a good idea to promote this operation and
1156   // what's the right type to promote it to.
1157   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1158     assert(PVT != VT && "Don't know what type to promote to!");
1159     // fold (aext (aext x)) -> (aext x)
1160     // fold (aext (zext x)) -> (zext x)
1161     // fold (aext (sext x)) -> (sext x)
1162     DEBUG(dbgs() << "\nPromoting ";
1163           Op.getNode()->dump(&DAG));
1164     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1165   }
1166   return SDValue();
1167 }
1168 
1169 bool DAGCombiner::PromoteLoad(SDValue Op) {
1170   if (!LegalOperations)
1171     return false;
1172 
1173   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1174     return false;
1175 
1176   EVT VT = Op.getValueType();
1177   if (VT.isVector() || !VT.isInteger())
1178     return false;
1179 
1180   // If operation type is 'undesirable', e.g. i16 on x86, consider
1181   // promoting it.
1182   unsigned Opc = Op.getOpcode();
1183   if (TLI.isTypeDesirableForOp(Opc, VT))
1184     return false;
1185 
1186   EVT PVT = VT;
1187   // Consult target whether it is a good idea to promote this operation and
1188   // what's the right type to promote it to.
1189   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1190     assert(PVT != VT && "Don't know what type to promote to!");
1191 
1192     SDLoc dl(Op);
1193     SDNode *N = Op.getNode();
1194     LoadSDNode *LD = cast<LoadSDNode>(N);
1195     EVT MemVT = LD->getMemoryVT();
1196     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1197       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1198                                                        : ISD::EXTLOAD)
1199       : LD->getExtensionType();
1200     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
1201                                    LD->getChain(), LD->getBasePtr(),
1202                                    MemVT, LD->getMemOperand());
1203     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
1204 
1205     DEBUG(dbgs() << "\nPromoting ";
1206           N->dump(&DAG);
1207           dbgs() << "\nTo: ";
1208           Result.getNode()->dump(&DAG);
1209           dbgs() << '\n');
1210     WorklistRemover DeadNodes(*this);
1211     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1212     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1213     deleteAndRecombine(N);
1214     AddToWorklist(Result.getNode());
1215     return true;
1216   }
1217   return false;
1218 }
1219 
1220 /// \brief Recursively delete a node which has no uses and any operands for
1221 /// which it is the only use.
1222 ///
1223 /// Note that this both deletes the nodes and removes them from the worklist.
1224 /// It also adds any nodes who have had a user deleted to the worklist as they
1225 /// may now have only one use and subject to other combines.
1226 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1227   if (!N->use_empty())
1228     return false;
1229 
1230   SmallSetVector<SDNode *, 16> Nodes;
1231   Nodes.insert(N);
1232   do {
1233     N = Nodes.pop_back_val();
1234     if (!N)
1235       continue;
1236 
1237     if (N->use_empty()) {
1238       for (const SDValue &ChildN : N->op_values())
1239         Nodes.insert(ChildN.getNode());
1240 
1241       removeFromWorklist(N);
1242       DAG.DeleteNode(N);
1243     } else {
1244       AddToWorklist(N);
1245     }
1246   } while (!Nodes.empty());
1247   return true;
1248 }
1249 
1250 //===----------------------------------------------------------------------===//
1251 //  Main DAG Combiner implementation
1252 //===----------------------------------------------------------------------===//
1253 
1254 void DAGCombiner::Run(CombineLevel AtLevel) {
1255   // set the instance variables, so that the various visit routines may use it.
1256   Level = AtLevel;
1257   LegalOperations = Level >= AfterLegalizeVectorOps;
1258   LegalTypes = Level >= AfterLegalizeTypes;
1259 
1260   // Add all the dag nodes to the worklist.
1261   for (SDNode &Node : DAG.allnodes())
1262     AddToWorklist(&Node);
1263 
1264   // Create a dummy node (which is not added to allnodes), that adds a reference
1265   // to the root node, preventing it from being deleted, and tracking any
1266   // changes of the root.
1267   HandleSDNode Dummy(DAG.getRoot());
1268 
1269   // While the worklist isn't empty, find a node and try to combine it.
1270   while (!WorklistMap.empty()) {
1271     SDNode *N;
1272     // The Worklist holds the SDNodes in order, but it may contain null entries.
1273     do {
1274       N = Worklist.pop_back_val();
1275     } while (!N);
1276 
1277     bool GoodWorklistEntry = WorklistMap.erase(N);
1278     (void)GoodWorklistEntry;
1279     assert(GoodWorklistEntry &&
1280            "Found a worklist entry without a corresponding map entry!");
1281 
1282     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1283     // N is deleted from the DAG, since they too may now be dead or may have a
1284     // reduced number of uses, allowing other xforms.
1285     if (recursivelyDeleteUnusedNodes(N))
1286       continue;
1287 
1288     WorklistRemover DeadNodes(*this);
1289 
1290     // If this combine is running after legalizing the DAG, re-legalize any
1291     // nodes pulled off the worklist.
1292     if (Level == AfterLegalizeDAG) {
1293       SmallSetVector<SDNode *, 16> UpdatedNodes;
1294       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1295 
1296       for (SDNode *LN : UpdatedNodes) {
1297         AddToWorklist(LN);
1298         AddUsersToWorklist(LN);
1299       }
1300       if (!NIsValid)
1301         continue;
1302     }
1303 
1304     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1305 
1306     // Add any operands of the new node which have not yet been combined to the
1307     // worklist as well. Because the worklist uniques things already, this
1308     // won't repeatedly process the same operand.
1309     CombinedNodes.insert(N);
1310     for (const SDValue &ChildN : N->op_values())
1311       if (!CombinedNodes.count(ChildN.getNode()))
1312         AddToWorklist(ChildN.getNode());
1313 
1314     SDValue RV = combine(N);
1315 
1316     if (!RV.getNode())
1317       continue;
1318 
1319     ++NodesCombined;
1320 
1321     // If we get back the same node we passed in, rather than a new node or
1322     // zero, we know that the node must have defined multiple values and
1323     // CombineTo was used.  Since CombineTo takes care of the worklist
1324     // mechanics for us, we have no work to do in this case.
1325     if (RV.getNode() == N)
1326       continue;
1327 
1328     assert(N->getOpcode() != ISD::DELETED_NODE &&
1329            RV.getOpcode() != ISD::DELETED_NODE &&
1330            "Node was deleted but visit returned new node!");
1331 
1332     DEBUG(dbgs() << " ... into: ";
1333           RV.getNode()->dump(&DAG));
1334 
1335     if (N->getNumValues() == RV.getNode()->getNumValues())
1336       DAG.ReplaceAllUsesWith(N, RV.getNode());
1337     else {
1338       assert(N->getValueType(0) == RV.getValueType() &&
1339              N->getNumValues() == 1 && "Type mismatch");
1340       SDValue OpV = RV;
1341       DAG.ReplaceAllUsesWith(N, &OpV);
1342     }
1343 
1344     // Push the new node and any users onto the worklist
1345     AddToWorklist(RV.getNode());
1346     AddUsersToWorklist(RV.getNode());
1347 
1348     // Finally, if the node is now dead, remove it from the graph.  The node
1349     // may not be dead if the replacement process recursively simplified to
1350     // something else needing this node. This will also take care of adding any
1351     // operands which have lost a user to the worklist.
1352     recursivelyDeleteUnusedNodes(N);
1353   }
1354 
1355   // If the root changed (e.g. it was a dead load, update the root).
1356   DAG.setRoot(Dummy.getValue());
1357   DAG.RemoveDeadNodes();
1358 }
1359 
1360 SDValue DAGCombiner::visit(SDNode *N) {
1361   switch (N->getOpcode()) {
1362   default: break;
1363   case ISD::TokenFactor:        return visitTokenFactor(N);
1364   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1365   case ISD::ADD:                return visitADD(N);
1366   case ISD::SUB:                return visitSUB(N);
1367   case ISD::ADDC:               return visitADDC(N);
1368   case ISD::SUBC:               return visitSUBC(N);
1369   case ISD::ADDE:               return visitADDE(N);
1370   case ISD::SUBE:               return visitSUBE(N);
1371   case ISD::MUL:                return visitMUL(N);
1372   case ISD::SDIV:               return visitSDIV(N);
1373   case ISD::UDIV:               return visitUDIV(N);
1374   case ISD::SREM:
1375   case ISD::UREM:               return visitREM(N);
1376   case ISD::MULHU:              return visitMULHU(N);
1377   case ISD::MULHS:              return visitMULHS(N);
1378   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1379   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1380   case ISD::SMULO:              return visitSMULO(N);
1381   case ISD::UMULO:              return visitUMULO(N);
1382   case ISD::SMIN:
1383   case ISD::SMAX:
1384   case ISD::UMIN:
1385   case ISD::UMAX:               return visitIMINMAX(N);
1386   case ISD::AND:                return visitAND(N);
1387   case ISD::OR:                 return visitOR(N);
1388   case ISD::XOR:                return visitXOR(N);
1389   case ISD::SHL:                return visitSHL(N);
1390   case ISD::SRA:                return visitSRA(N);
1391   case ISD::SRL:                return visitSRL(N);
1392   case ISD::ROTR:
1393   case ISD::ROTL:               return visitRotate(N);
1394   case ISD::BSWAP:              return visitBSWAP(N);
1395   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1396   case ISD::CTLZ:               return visitCTLZ(N);
1397   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1398   case ISD::CTTZ:               return visitCTTZ(N);
1399   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1400   case ISD::CTPOP:              return visitCTPOP(N);
1401   case ISD::SELECT:             return visitSELECT(N);
1402   case ISD::VSELECT:            return visitVSELECT(N);
1403   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1404   case ISD::SETCC:              return visitSETCC(N);
1405   case ISD::SETCCE:             return visitSETCCE(N);
1406   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1407   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1408   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1409   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1410   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1411   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1412   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1413   case ISD::BITCAST:            return visitBITCAST(N);
1414   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1415   case ISD::FADD:               return visitFADD(N);
1416   case ISD::FSUB:               return visitFSUB(N);
1417   case ISD::FMUL:               return visitFMUL(N);
1418   case ISD::FMA:                return visitFMA(N);
1419   case ISD::FDIV:               return visitFDIV(N);
1420   case ISD::FREM:               return visitFREM(N);
1421   case ISD::FSQRT:              return visitFSQRT(N);
1422   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1423   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1424   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1425   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1426   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1427   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1428   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1429   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1430   case ISD::FNEG:               return visitFNEG(N);
1431   case ISD::FABS:               return visitFABS(N);
1432   case ISD::FFLOOR:             return visitFFLOOR(N);
1433   case ISD::FMINNUM:            return visitFMINNUM(N);
1434   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1435   case ISD::FCEIL:              return visitFCEIL(N);
1436   case ISD::FTRUNC:             return visitFTRUNC(N);
1437   case ISD::BRCOND:             return visitBRCOND(N);
1438   case ISD::BR_CC:              return visitBR_CC(N);
1439   case ISD::LOAD:               return visitLOAD(N);
1440   case ISD::STORE:              return visitSTORE(N);
1441   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1442   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1443   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1444   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1445   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1446   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1447   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1448   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1449   case ISD::MGATHER:            return visitMGATHER(N);
1450   case ISD::MLOAD:              return visitMLOAD(N);
1451   case ISD::MSCATTER:           return visitMSCATTER(N);
1452   case ISD::MSTORE:             return visitMSTORE(N);
1453   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1454   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1455   }
1456   return SDValue();
1457 }
1458 
1459 SDValue DAGCombiner::combine(SDNode *N) {
1460   SDValue RV = visit(N);
1461 
1462   // If nothing happened, try a target-specific DAG combine.
1463   if (!RV.getNode()) {
1464     assert(N->getOpcode() != ISD::DELETED_NODE &&
1465            "Node was deleted but visit returned NULL!");
1466 
1467     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1468         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1469 
1470       // Expose the DAG combiner to the target combiner impls.
1471       TargetLowering::DAGCombinerInfo
1472         DagCombineInfo(DAG, Level, false, this);
1473 
1474       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1475     }
1476   }
1477 
1478   // If nothing happened still, try promoting the operation.
1479   if (!RV.getNode()) {
1480     switch (N->getOpcode()) {
1481     default: break;
1482     case ISD::ADD:
1483     case ISD::SUB:
1484     case ISD::MUL:
1485     case ISD::AND:
1486     case ISD::OR:
1487     case ISD::XOR:
1488       RV = PromoteIntBinOp(SDValue(N, 0));
1489       break;
1490     case ISD::SHL:
1491     case ISD::SRA:
1492     case ISD::SRL:
1493       RV = PromoteIntShiftOp(SDValue(N, 0));
1494       break;
1495     case ISD::SIGN_EXTEND:
1496     case ISD::ZERO_EXTEND:
1497     case ISD::ANY_EXTEND:
1498       RV = PromoteExtend(SDValue(N, 0));
1499       break;
1500     case ISD::LOAD:
1501       if (PromoteLoad(SDValue(N, 0)))
1502         RV = SDValue(N, 0);
1503       break;
1504     }
1505   }
1506 
1507   // If N is a commutative binary node, try commuting it to enable more
1508   // sdisel CSE.
1509   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1510       N->getNumValues() == 1) {
1511     SDValue N0 = N->getOperand(0);
1512     SDValue N1 = N->getOperand(1);
1513 
1514     // Constant operands are canonicalized to RHS.
1515     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1516       SDValue Ops[] = {N1, N0};
1517       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1518                                             N->getFlags());
1519       if (CSENode)
1520         return SDValue(CSENode, 0);
1521     }
1522   }
1523 
1524   return RV;
1525 }
1526 
1527 /// Given a node, return its input chain if it has one, otherwise return a null
1528 /// sd operand.
1529 static SDValue getInputChainForNode(SDNode *N) {
1530   if (unsigned NumOps = N->getNumOperands()) {
1531     if (N->getOperand(0).getValueType() == MVT::Other)
1532       return N->getOperand(0);
1533     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1534       return N->getOperand(NumOps-1);
1535     for (unsigned i = 1; i < NumOps-1; ++i)
1536       if (N->getOperand(i).getValueType() == MVT::Other)
1537         return N->getOperand(i);
1538   }
1539   return SDValue();
1540 }
1541 
1542 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1543   // If N has two operands, where one has an input chain equal to the other,
1544   // the 'other' chain is redundant.
1545   if (N->getNumOperands() == 2) {
1546     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1547       return N->getOperand(0);
1548     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1549       return N->getOperand(1);
1550   }
1551 
1552   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1553   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
1554   SmallPtrSet<SDNode*, 16> SeenOps;
1555   bool Changed = false;             // If we should replace this token factor.
1556 
1557   // Start out with this token factor.
1558   TFs.push_back(N);
1559 
1560   // Iterate through token factors.  The TFs grows when new token factors are
1561   // encountered.
1562   for (unsigned i = 0; i < TFs.size(); ++i) {
1563     SDNode *TF = TFs[i];
1564 
1565     // Check each of the operands.
1566     for (const SDValue &Op : TF->op_values()) {
1567 
1568       switch (Op.getOpcode()) {
1569       case ISD::EntryToken:
1570         // Entry tokens don't need to be added to the list. They are
1571         // redundant.
1572         Changed = true;
1573         break;
1574 
1575       case ISD::TokenFactor:
1576         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1577           // Queue up for processing.
1578           TFs.push_back(Op.getNode());
1579           // Clean up in case the token factor is removed.
1580           AddToWorklist(Op.getNode());
1581           Changed = true;
1582           break;
1583         }
1584         LLVM_FALLTHROUGH;
1585 
1586       default:
1587         // Only add if it isn't already in the list.
1588         if (SeenOps.insert(Op.getNode()).second)
1589           Ops.push_back(Op);
1590         else
1591           Changed = true;
1592         break;
1593       }
1594     }
1595   }
1596 
1597   SDValue Result;
1598 
1599   // If we've changed things around then replace token factor.
1600   if (Changed) {
1601     if (Ops.empty()) {
1602       // The entry token is the only possible outcome.
1603       Result = DAG.getEntryNode();
1604     } else {
1605       // New and improved token factor.
1606       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1607     }
1608 
1609     // Add users to worklist if AA is enabled, since it may introduce
1610     // a lot of new chained token factors while removing memory deps.
1611     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1612       : DAG.getSubtarget().useAA();
1613     return CombineTo(N, Result, UseAA /*add to worklist*/);
1614   }
1615 
1616   return Result;
1617 }
1618 
1619 /// MERGE_VALUES can always be eliminated.
1620 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1621   WorklistRemover DeadNodes(*this);
1622   // Replacing results may cause a different MERGE_VALUES to suddenly
1623   // be CSE'd with N, and carry its uses with it. Iterate until no
1624   // uses remain, to ensure that the node can be safely deleted.
1625   // First add the users of this node to the work list so that they
1626   // can be tried again once they have new operands.
1627   AddUsersToWorklist(N);
1628   do {
1629     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1630       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1631   } while (!N->use_empty());
1632   deleteAndRecombine(N);
1633   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1634 }
1635 
1636 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1637 /// ConstantSDNode pointer else nullptr.
1638 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1639   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1640   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1641 }
1642 
1643 SDValue DAGCombiner::visitADD(SDNode *N) {
1644   SDValue N0 = N->getOperand(0);
1645   SDValue N1 = N->getOperand(1);
1646   EVT VT = N0.getValueType();
1647 
1648   // fold vector ops
1649   if (VT.isVector()) {
1650     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1651       return FoldedVOp;
1652 
1653     // fold (add x, 0) -> x, vector edition
1654     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1655       return N0;
1656     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1657       return N1;
1658   }
1659 
1660   // fold (add x, undef) -> undef
1661   if (N0.isUndef())
1662     return N0;
1663   if (N1.isUndef())
1664     return N1;
1665   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1666     // canonicalize constant to RHS
1667     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1668       return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
1669     // fold (add c1, c2) -> c1+c2
1670     return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
1671                                       N0.getNode(), N1.getNode());
1672   }
1673   // fold (add x, 0) -> x
1674   if (isNullConstant(N1))
1675     return N0;
1676   // fold ((c1-A)+c2) -> (c1+c2)-A
1677   if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
1678     if (N0.getOpcode() == ISD::SUB)
1679       if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
1680         SDLoc DL(N);
1681         return DAG.getNode(ISD::SUB, DL, VT,
1682                            DAG.getConstant(N1C->getAPIntValue()+
1683                                            N0C->getAPIntValue(), DL, VT),
1684                            N0.getOperand(1));
1685       }
1686   }
1687   // reassociate add
1688   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
1689     return RADD;
1690   // fold ((0-A) + B) -> B-A
1691   if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
1692     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
1693   // fold (A + (0-B)) -> A-B
1694   if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
1695     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
1696   // fold (A+(B-A)) -> B
1697   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1698     return N1.getOperand(0);
1699   // fold ((B-A)+A) -> B
1700   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1701     return N0.getOperand(0);
1702   // fold (A+(B-(A+C))) to (B-C)
1703   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1704       N0 == N1.getOperand(1).getOperand(0))
1705     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1706                        N1.getOperand(1).getOperand(1));
1707   // fold (A+(B-(C+A))) to (B-C)
1708   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1709       N0 == N1.getOperand(1).getOperand(1))
1710     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1711                        N1.getOperand(1).getOperand(0));
1712   // fold (A+((B-A)+or-C)) to (B+or-C)
1713   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1714       N1.getOperand(0).getOpcode() == ISD::SUB &&
1715       N0 == N1.getOperand(0).getOperand(1))
1716     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
1717                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
1718 
1719   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1720   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1721     SDValue N00 = N0.getOperand(0);
1722     SDValue N01 = N0.getOperand(1);
1723     SDValue N10 = N1.getOperand(0);
1724     SDValue N11 = N1.getOperand(1);
1725 
1726     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1727       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1728                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1729                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1730   }
1731 
1732   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1733     return SDValue(N, 0);
1734 
1735   // fold (a+b) -> (a|b) iff a and b share no bits.
1736   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1737       VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
1738     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
1739 
1740   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1741   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1742       isNullConstant(N1.getOperand(0).getOperand(0)))
1743     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
1744                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
1745                                    N1.getOperand(0).getOperand(1),
1746                                    N1.getOperand(1)));
1747   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1748       isNullConstant(N0.getOperand(0).getOperand(0)))
1749     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
1750                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
1751                                    N0.getOperand(0).getOperand(1),
1752                                    N0.getOperand(1)));
1753 
1754   if (N1.getOpcode() == ISD::AND) {
1755     SDValue AndOp0 = N1.getOperand(0);
1756     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1757     unsigned DestBits = VT.getScalarType().getSizeInBits();
1758 
1759     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1760     // and similar xforms where the inner op is either ~0 or 0.
1761     if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
1762       SDLoc DL(N);
1763       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1764     }
1765   }
1766 
1767   // add (sext i1), X -> sub X, (zext i1)
1768   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1769       N0.getOperand(0).getValueType() == MVT::i1 &&
1770       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1771     SDLoc DL(N);
1772     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1773     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1774   }
1775 
1776   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1777   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1778     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1779     if (TN->getVT() == MVT::i1) {
1780       SDLoc DL(N);
1781       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1782                                  DAG.getConstant(1, DL, VT));
1783       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1784     }
1785   }
1786 
1787   return SDValue();
1788 }
1789 
1790 SDValue DAGCombiner::visitADDC(SDNode *N) {
1791   SDValue N0 = N->getOperand(0);
1792   SDValue N1 = N->getOperand(1);
1793   EVT VT = N0.getValueType();
1794 
1795   // If the flag result is dead, turn this into an ADD.
1796   if (!N->hasAnyUseOfValue(1))
1797     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1798                      DAG.getNode(ISD::CARRY_FALSE,
1799                                  SDLoc(N), MVT::Glue));
1800 
1801   // canonicalize constant to RHS.
1802   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1803   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1804   if (N0C && !N1C)
1805     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1806 
1807   // fold (addc x, 0) -> x + no carry out
1808   if (isNullConstant(N1))
1809     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1810                                         SDLoc(N), MVT::Glue));
1811 
1812   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1813   APInt LHSZero, LHSOne;
1814   APInt RHSZero, RHSOne;
1815   DAG.computeKnownBits(N0, LHSZero, LHSOne);
1816 
1817   if (LHSZero.getBoolValue()) {
1818     DAG.computeKnownBits(N1, RHSZero, RHSOne);
1819 
1820     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1821     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1822     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1823       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1824                        DAG.getNode(ISD::CARRY_FALSE,
1825                                    SDLoc(N), MVT::Glue));
1826   }
1827 
1828   return SDValue();
1829 }
1830 
1831 SDValue DAGCombiner::visitADDE(SDNode *N) {
1832   SDValue N0 = N->getOperand(0);
1833   SDValue N1 = N->getOperand(1);
1834   SDValue CarryIn = N->getOperand(2);
1835 
1836   // canonicalize constant to RHS
1837   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1838   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1839   if (N0C && !N1C)
1840     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1841                        N1, N0, CarryIn);
1842 
1843   // fold (adde x, y, false) -> (addc x, y)
1844   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1845     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1846 
1847   return SDValue();
1848 }
1849 
1850 // Since it may not be valid to emit a fold to zero for vector initializers
1851 // check if we can before folding.
1852 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
1853                              SelectionDAG &DAG, bool LegalOperations,
1854                              bool LegalTypes) {
1855   if (!VT.isVector())
1856     return DAG.getConstant(0, DL, VT);
1857   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1858     return DAG.getConstant(0, DL, VT);
1859   return SDValue();
1860 }
1861 
1862 SDValue DAGCombiner::visitSUB(SDNode *N) {
1863   SDValue N0 = N->getOperand(0);
1864   SDValue N1 = N->getOperand(1);
1865   EVT VT = N0.getValueType();
1866 
1867   // fold vector ops
1868   if (VT.isVector()) {
1869     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1870       return FoldedVOp;
1871 
1872     // fold (sub x, 0) -> x, vector edition
1873     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1874       return N0;
1875   }
1876 
1877   // fold (sub x, x) -> 0
1878   // FIXME: Refactor this and xor and other similar operations together.
1879   if (N0 == N1)
1880     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
1881   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
1882       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1883     // fold (sub c1, c2) -> c1-c2
1884     return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
1885                                       N0.getNode(), N1.getNode());
1886   }
1887   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
1888   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
1889   // fold (sub x, c) -> (add x, -c)
1890   if (N1C) {
1891     SDLoc DL(N);
1892     return DAG.getNode(ISD::ADD, DL, VT, N0,
1893                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1894   }
1895   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1896   if (isAllOnesConstant(N0))
1897     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
1898   // fold A-(A-B) -> B
1899   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1900     return N1.getOperand(1);
1901   // fold (A+B)-A -> B
1902   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1903     return N0.getOperand(1);
1904   // fold (A+B)-B -> A
1905   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1906     return N0.getOperand(0);
1907   // fold C2-(A+C1) -> (C2-C1)-A
1908   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
1909     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
1910   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
1911     SDLoc DL(N);
1912     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
1913                                    DL, VT);
1914     return DAG.getNode(ISD::SUB, DL, VT, NewC,
1915                        N1.getOperand(0));
1916   }
1917   // fold ((A+(B+or-C))-B) -> A+or-C
1918   if (N0.getOpcode() == ISD::ADD &&
1919       (N0.getOperand(1).getOpcode() == ISD::SUB ||
1920        N0.getOperand(1).getOpcode() == ISD::ADD) &&
1921       N0.getOperand(1).getOperand(0) == N1)
1922     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
1923                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
1924   // fold ((A+(C+B))-B) -> A+C
1925   if (N0.getOpcode() == ISD::ADD &&
1926       N0.getOperand(1).getOpcode() == ISD::ADD &&
1927       N0.getOperand(1).getOperand(1) == N1)
1928     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1929                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
1930   // fold ((A-(B-C))-C) -> A-B
1931   if (N0.getOpcode() == ISD::SUB &&
1932       N0.getOperand(1).getOpcode() == ISD::SUB &&
1933       N0.getOperand(1).getOperand(1) == N1)
1934     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1935                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
1936 
1937   // If either operand of a sub is undef, the result is undef
1938   if (N0.isUndef())
1939     return N0;
1940   if (N1.isUndef())
1941     return N1;
1942 
1943   // If the relocation model supports it, consider symbol offsets.
1944   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1945     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1946       // fold (sub Sym, c) -> Sym-c
1947       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1948         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1949                                     GA->getOffset() -
1950                                       (uint64_t)N1C->getSExtValue());
1951       // fold (sub Sym+c1, Sym+c2) -> c1-c2
1952       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1953         if (GA->getGlobal() == GB->getGlobal())
1954           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1955                                  SDLoc(N), VT);
1956     }
1957 
1958   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
1959   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1960     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1961     if (TN->getVT() == MVT::i1) {
1962       SDLoc DL(N);
1963       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1964                                  DAG.getConstant(1, DL, VT));
1965       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
1966     }
1967   }
1968 
1969   return SDValue();
1970 }
1971 
1972 SDValue DAGCombiner::visitSUBC(SDNode *N) {
1973   SDValue N0 = N->getOperand(0);
1974   SDValue N1 = N->getOperand(1);
1975   EVT VT = N0.getValueType();
1976   SDLoc DL(N);
1977 
1978   // If the flag result is dead, turn this into an SUB.
1979   if (!N->hasAnyUseOfValue(1))
1980     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
1981                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1982 
1983   // fold (subc x, x) -> 0 + no borrow
1984   if (N0 == N1)
1985     return CombineTo(N, DAG.getConstant(0, DL, VT),
1986                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1987 
1988   // fold (subc x, 0) -> x + no borrow
1989   if (isNullConstant(N1))
1990     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1991 
1992   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
1993   if (isAllOnesConstant(N0))
1994     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
1995                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1996 
1997   return SDValue();
1998 }
1999 
2000 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2001   SDValue N0 = N->getOperand(0);
2002   SDValue N1 = N->getOperand(1);
2003   SDValue CarryIn = N->getOperand(2);
2004 
2005   // fold (sube x, y, false) -> (subc x, y)
2006   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2007     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2008 
2009   return SDValue();
2010 }
2011 
2012 SDValue DAGCombiner::visitMUL(SDNode *N) {
2013   SDValue N0 = N->getOperand(0);
2014   SDValue N1 = N->getOperand(1);
2015   EVT VT = N0.getValueType();
2016 
2017   // fold (mul x, undef) -> 0
2018   if (N0.isUndef() || N1.isUndef())
2019     return DAG.getConstant(0, SDLoc(N), VT);
2020 
2021   bool N0IsConst = false;
2022   bool N1IsConst = false;
2023   bool N1IsOpaqueConst = false;
2024   bool N0IsOpaqueConst = false;
2025   APInt ConstValue0, ConstValue1;
2026   // fold vector ops
2027   if (VT.isVector()) {
2028     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2029       return FoldedVOp;
2030 
2031     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2032     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2033   } else {
2034     N0IsConst = isa<ConstantSDNode>(N0);
2035     if (N0IsConst) {
2036       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2037       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2038     }
2039     N1IsConst = isa<ConstantSDNode>(N1);
2040     if (N1IsConst) {
2041       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2042       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2043     }
2044   }
2045 
2046   // fold (mul c1, c2) -> c1*c2
2047   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2048     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2049                                       N0.getNode(), N1.getNode());
2050 
2051   // canonicalize constant to RHS (vector doesn't have to splat)
2052   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2053      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2054     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2055   // fold (mul x, 0) -> 0
2056   if (N1IsConst && ConstValue1 == 0)
2057     return N1;
2058   // We require a splat of the entire scalar bit width for non-contiguous
2059   // bit patterns.
2060   bool IsFullSplat =
2061     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
2062   // fold (mul x, 1) -> x
2063   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2064     return N0;
2065   // fold (mul x, -1) -> 0-x
2066   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2067     SDLoc DL(N);
2068     return DAG.getNode(ISD::SUB, DL, VT,
2069                        DAG.getConstant(0, DL, VT), N0);
2070   }
2071   // fold (mul x, (1 << c)) -> x << c
2072   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2073       IsFullSplat) {
2074     SDLoc DL(N);
2075     return DAG.getNode(ISD::SHL, DL, VT, N0,
2076                        DAG.getConstant(ConstValue1.logBase2(), DL,
2077                                        getShiftAmountTy(N0.getValueType())));
2078   }
2079   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2080   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2081       IsFullSplat) {
2082     unsigned Log2Val = (-ConstValue1).logBase2();
2083     SDLoc DL(N);
2084     // FIXME: If the input is something that is easily negated (e.g. a
2085     // single-use add), we should put the negate there.
2086     return DAG.getNode(ISD::SUB, DL, VT,
2087                        DAG.getConstant(0, DL, VT),
2088                        DAG.getNode(ISD::SHL, DL, VT, N0,
2089                             DAG.getConstant(Log2Val, DL,
2090                                       getShiftAmountTy(N0.getValueType()))));
2091   }
2092 
2093   APInt Val;
2094   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2095   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
2096       (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2097        isa<ConstantSDNode>(N0.getOperand(1)))) {
2098     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2099     AddToWorklist(C3.getNode());
2100     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2101   }
2102 
2103   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2104   // use.
2105   {
2106     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2107     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2108     if (N0.getOpcode() == ISD::SHL &&
2109         (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2110          isa<ConstantSDNode>(N0.getOperand(1))) &&
2111         N0.getNode()->hasOneUse()) {
2112       Sh = N0; Y = N1;
2113     } else if (N1.getOpcode() == ISD::SHL &&
2114                isa<ConstantSDNode>(N1.getOperand(1)) &&
2115                N1.getNode()->hasOneUse()) {
2116       Sh = N1; Y = N0;
2117     }
2118 
2119     if (Sh.getNode()) {
2120       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2121       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2122     }
2123   }
2124 
2125   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2126   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2127       N0.getOpcode() == ISD::ADD &&
2128       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2129       isMulAddWithConstProfitable(N, N0, N1))
2130       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2131                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2132                                      N0.getOperand(0), N1),
2133                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2134                                      N0.getOperand(1), N1));
2135 
2136   // reassociate mul
2137   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2138     return RMUL;
2139 
2140   return SDValue();
2141 }
2142 
2143 /// Return true if divmod libcall is available.
2144 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2145                                      const TargetLowering &TLI) {
2146   RTLIB::Libcall LC;
2147   EVT NodeType = Node->getValueType(0);
2148   if (!NodeType.isSimple())
2149     return false;
2150   switch (NodeType.getSimpleVT().SimpleTy) {
2151   default: return false; // No libcall for vector types.
2152   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2153   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2154   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2155   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2156   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2157   }
2158 
2159   return TLI.getLibcallName(LC) != nullptr;
2160 }
2161 
2162 /// Issue divrem if both quotient and remainder are needed.
2163 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2164   if (Node->use_empty())
2165     return SDValue(); // This is a dead node, leave it alone.
2166 
2167   unsigned Opcode = Node->getOpcode();
2168   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2169   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2170 
2171   // DivMod lib calls can still work on non-legal types if using lib-calls.
2172   EVT VT = Node->getValueType(0);
2173   if (VT.isVector() || !VT.isInteger())
2174     return SDValue();
2175 
2176   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2177     return SDValue();
2178 
2179   // If DIVREM is going to get expanded into a libcall,
2180   // but there is no libcall available, then don't combine.
2181   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2182       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2183     return SDValue();
2184 
2185   // If div is legal, it's better to do the normal expansion
2186   unsigned OtherOpcode = 0;
2187   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2188     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2189     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2190       return SDValue();
2191   } else {
2192     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2193     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2194       return SDValue();
2195   }
2196 
2197   SDValue Op0 = Node->getOperand(0);
2198   SDValue Op1 = Node->getOperand(1);
2199   SDValue combined;
2200   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2201          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2202     SDNode *User = *UI;
2203     if (User == Node || User->use_empty())
2204       continue;
2205     // Convert the other matching node(s), too;
2206     // otherwise, the DIVREM may get target-legalized into something
2207     // target-specific that we won't be able to recognize.
2208     unsigned UserOpc = User->getOpcode();
2209     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2210         User->getOperand(0) == Op0 &&
2211         User->getOperand(1) == Op1) {
2212       if (!combined) {
2213         if (UserOpc == OtherOpcode) {
2214           SDVTList VTs = DAG.getVTList(VT, VT);
2215           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2216         } else if (UserOpc == DivRemOpc) {
2217           combined = SDValue(User, 0);
2218         } else {
2219           assert(UserOpc == Opcode);
2220           continue;
2221         }
2222       }
2223       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2224         CombineTo(User, combined);
2225       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2226         CombineTo(User, combined.getValue(1));
2227     }
2228   }
2229   return combined;
2230 }
2231 
2232 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2233   SDValue N0 = N->getOperand(0);
2234   SDValue N1 = N->getOperand(1);
2235   EVT VT = N->getValueType(0);
2236 
2237   // fold vector ops
2238   if (VT.isVector())
2239     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2240       return FoldedVOp;
2241 
2242   SDLoc DL(N);
2243 
2244   // fold (sdiv c1, c2) -> c1/c2
2245   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2246   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2247   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2248     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2249   // fold (sdiv X, 1) -> X
2250   if (N1C && N1C->isOne())
2251     return N0;
2252   // fold (sdiv X, -1) -> 0-X
2253   if (N1C && N1C->isAllOnesValue())
2254     return DAG.getNode(ISD::SUB, DL, VT,
2255                        DAG.getConstant(0, DL, VT), N0);
2256 
2257   // If we know the sign bits of both operands are zero, strength reduce to a
2258   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2259   if (!VT.isVector()) {
2260     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2261       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2262   }
2263 
2264   // fold (sdiv X, pow2) -> simple ops after legalize
2265   // FIXME: We check for the exact bit here because the generic lowering gives
2266   // better results in that case. The target-specific lowering should learn how
2267   // to handle exact sdivs efficiently.
2268   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2269       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2270       (N1C->getAPIntValue().isPowerOf2() ||
2271        (-N1C->getAPIntValue()).isPowerOf2())) {
2272     // Target-specific implementation of sdiv x, pow2.
2273     if (SDValue Res = BuildSDIVPow2(N))
2274       return Res;
2275 
2276     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2277 
2278     // Splat the sign bit into the register
2279     SDValue SGN =
2280         DAG.getNode(ISD::SRA, DL, VT, N0,
2281                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2282                                     getShiftAmountTy(N0.getValueType())));
2283     AddToWorklist(SGN.getNode());
2284 
2285     // Add (N0 < 0) ? abs2 - 1 : 0;
2286     SDValue SRL =
2287         DAG.getNode(ISD::SRL, DL, VT, SGN,
2288                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2289                                     getShiftAmountTy(SGN.getValueType())));
2290     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2291     AddToWorklist(SRL.getNode());
2292     AddToWorklist(ADD.getNode());    // Divide by pow2
2293     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2294                   DAG.getConstant(lg2, DL,
2295                                   getShiftAmountTy(ADD.getValueType())));
2296 
2297     // If we're dividing by a positive value, we're done.  Otherwise, we must
2298     // negate the result.
2299     if (N1C->getAPIntValue().isNonNegative())
2300       return SRA;
2301 
2302     AddToWorklist(SRA.getNode());
2303     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2304   }
2305 
2306   // If integer divide is expensive and we satisfy the requirements, emit an
2307   // alternate sequence.  Targets may check function attributes for size/speed
2308   // trade-offs.
2309   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2310   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2311     if (SDValue Op = BuildSDIV(N))
2312       return Op;
2313 
2314   // sdiv, srem -> sdivrem
2315   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2316   // Otherwise, we break the simplification logic in visitREM().
2317   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2318     if (SDValue DivRem = useDivRem(N))
2319         return DivRem;
2320 
2321   // undef / X -> 0
2322   if (N0.isUndef())
2323     return DAG.getConstant(0, DL, VT);
2324   // X / undef -> undef
2325   if (N1.isUndef())
2326     return N1;
2327 
2328   return SDValue();
2329 }
2330 
2331 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2332   SDValue N0 = N->getOperand(0);
2333   SDValue N1 = N->getOperand(1);
2334   EVT VT = N->getValueType(0);
2335 
2336   // fold vector ops
2337   if (VT.isVector())
2338     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2339       return FoldedVOp;
2340 
2341   SDLoc DL(N);
2342 
2343   // fold (udiv c1, c2) -> c1/c2
2344   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2345   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2346   if (N0C && N1C)
2347     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2348                                                     N0C, N1C))
2349       return Folded;
2350   // fold (udiv x, (1 << c)) -> x >>u c
2351   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
2352     return DAG.getNode(ISD::SRL, DL, VT, N0,
2353                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
2354                                        getShiftAmountTy(N0.getValueType())));
2355 
2356   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2357   if (N1.getOpcode() == ISD::SHL) {
2358     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2359       if (SHC->getAPIntValue().isPowerOf2()) {
2360         EVT ADDVT = N1.getOperand(1).getValueType();
2361         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
2362                                   N1.getOperand(1),
2363                                   DAG.getConstant(SHC->getAPIntValue()
2364                                                                   .logBase2(),
2365                                                   DL, ADDVT));
2366         AddToWorklist(Add.getNode());
2367         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2368       }
2369     }
2370   }
2371 
2372   // fold (udiv x, c) -> alternate
2373   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2374   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2375     if (SDValue Op = BuildUDIV(N))
2376       return Op;
2377 
2378   // sdiv, srem -> sdivrem
2379   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
2380   // Otherwise, we break the simplification logic in visitREM().
2381   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2382     if (SDValue DivRem = useDivRem(N))
2383         return DivRem;
2384 
2385   // undef / X -> 0
2386   if (N0.isUndef())
2387     return DAG.getConstant(0, DL, VT);
2388   // X / undef -> undef
2389   if (N1.isUndef())
2390     return N1;
2391 
2392   return SDValue();
2393 }
2394 
2395 // handles ISD::SREM and ISD::UREM
2396 SDValue DAGCombiner::visitREM(SDNode *N) {
2397   unsigned Opcode = N->getOpcode();
2398   SDValue N0 = N->getOperand(0);
2399   SDValue N1 = N->getOperand(1);
2400   EVT VT = N->getValueType(0);
2401   bool isSigned = (Opcode == ISD::SREM);
2402   SDLoc DL(N);
2403 
2404   // fold (rem c1, c2) -> c1%c2
2405   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2406   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2407   if (N0C && N1C)
2408     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2409       return Folded;
2410 
2411   if (isSigned) {
2412     // If we know the sign bits of both operands are zero, strength reduce to a
2413     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2414     if (!VT.isVector()) {
2415       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2416         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2417     }
2418   } else {
2419     // fold (urem x, pow2) -> (and x, pow2-1)
2420     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2421         N1C->getAPIntValue().isPowerOf2()) {
2422       return DAG.getNode(ISD::AND, DL, VT, N0,
2423                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
2424     }
2425     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2426     if (N1.getOpcode() == ISD::SHL) {
2427       ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
2428       if (SHC && SHC->getAPIntValue().isPowerOf2()) {
2429         APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
2430         SDValue Add =
2431             DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
2432         AddToWorklist(Add.getNode());
2433         return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2434       }
2435     }
2436   }
2437 
2438   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2439 
2440   // If X/C can be simplified by the division-by-constant logic, lower
2441   // X%C to the equivalent of X-X/C*C.
2442   // To avoid mangling nodes, this simplification requires that the combine()
2443   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2444   // against this by skipping the simplification if isIntDivCheap().  When
2445   // div is not cheap, combine will not return a DIVREM.  Regardless,
2446   // checking cheapness here makes sense since the simplification results in
2447   // fatter code.
2448   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2449     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2450     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2451     AddToWorklist(Div.getNode());
2452     SDValue OptimizedDiv = combine(Div.getNode());
2453     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2454       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2455              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2456       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2457       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2458       AddToWorklist(Mul.getNode());
2459       return Sub;
2460     }
2461   }
2462 
2463   // sdiv, srem -> sdivrem
2464   if (SDValue DivRem = useDivRem(N))
2465     return DivRem.getValue(1);
2466 
2467   // undef % X -> 0
2468   if (N0.isUndef())
2469     return DAG.getConstant(0, DL, VT);
2470   // X % undef -> undef
2471   if (N1.isUndef())
2472     return N1;
2473 
2474   return SDValue();
2475 }
2476 
2477 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2478   SDValue N0 = N->getOperand(0);
2479   SDValue N1 = N->getOperand(1);
2480   EVT VT = N->getValueType(0);
2481   SDLoc DL(N);
2482 
2483   // fold (mulhs x, 0) -> 0
2484   if (isNullConstant(N1))
2485     return N1;
2486   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2487   if (isOneConstant(N1)) {
2488     SDLoc DL(N);
2489     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2490                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
2491                                        DL,
2492                                        getShiftAmountTy(N0.getValueType())));
2493   }
2494   // fold (mulhs x, undef) -> 0
2495   if (N0.isUndef() || N1.isUndef())
2496     return DAG.getConstant(0, SDLoc(N), VT);
2497 
2498   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2499   // plus a shift.
2500   if (VT.isSimple() && !VT.isVector()) {
2501     MVT Simple = VT.getSimpleVT();
2502     unsigned SimpleSize = Simple.getSizeInBits();
2503     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2504     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2505       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2506       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2507       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2508       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2509             DAG.getConstant(SimpleSize, DL,
2510                             getShiftAmountTy(N1.getValueType())));
2511       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2512     }
2513   }
2514 
2515   return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2519   SDValue N0 = N->getOperand(0);
2520   SDValue N1 = N->getOperand(1);
2521   EVT VT = N->getValueType(0);
2522   SDLoc DL(N);
2523 
2524   // fold (mulhu x, 0) -> 0
2525   if (isNullConstant(N1))
2526     return N1;
2527   // fold (mulhu x, 1) -> 0
2528   if (isOneConstant(N1))
2529     return DAG.getConstant(0, DL, N0.getValueType());
2530   // fold (mulhu x, undef) -> 0
2531   if (N0.isUndef() || N1.isUndef())
2532     return DAG.getConstant(0, DL, VT);
2533 
2534   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2535   // plus a shift.
2536   if (VT.isSimple() && !VT.isVector()) {
2537     MVT Simple = VT.getSimpleVT();
2538     unsigned SimpleSize = Simple.getSizeInBits();
2539     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2540     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2541       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2542       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2543       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2544       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2545             DAG.getConstant(SimpleSize, DL,
2546                             getShiftAmountTy(N1.getValueType())));
2547       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2548     }
2549   }
2550 
2551   return SDValue();
2552 }
2553 
2554 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2555 /// give the opcodes for the two computations that are being performed. Return
2556 /// true if a simplification was made.
2557 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2558                                                 unsigned HiOp) {
2559   // If the high half is not needed, just compute the low half.
2560   bool HiExists = N->hasAnyUseOfValue(1);
2561   if (!HiExists &&
2562       (!LegalOperations ||
2563        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2564     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2565     return CombineTo(N, Res, Res);
2566   }
2567 
2568   // If the low half is not needed, just compute the high half.
2569   bool LoExists = N->hasAnyUseOfValue(0);
2570   if (!LoExists &&
2571       (!LegalOperations ||
2572        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2573     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2574     return CombineTo(N, Res, Res);
2575   }
2576 
2577   // If both halves are used, return as it is.
2578   if (LoExists && HiExists)
2579     return SDValue();
2580 
2581   // If the two computed results can be simplified separately, separate them.
2582   if (LoExists) {
2583     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2584     AddToWorklist(Lo.getNode());
2585     SDValue LoOpt = combine(Lo.getNode());
2586     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2587         (!LegalOperations ||
2588          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2589       return CombineTo(N, LoOpt, LoOpt);
2590   }
2591 
2592   if (HiExists) {
2593     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2594     AddToWorklist(Hi.getNode());
2595     SDValue HiOpt = combine(Hi.getNode());
2596     if (HiOpt.getNode() && HiOpt != Hi &&
2597         (!LegalOperations ||
2598          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2599       return CombineTo(N, HiOpt, HiOpt);
2600   }
2601 
2602   return SDValue();
2603 }
2604 
2605 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2606   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2607     return Res;
2608 
2609   EVT VT = N->getValueType(0);
2610   SDLoc DL(N);
2611 
2612   // If the type is twice as wide is legal, transform the mulhu to a wider
2613   // multiply plus a shift.
2614   if (VT.isSimple() && !VT.isVector()) {
2615     MVT Simple = VT.getSimpleVT();
2616     unsigned SimpleSize = Simple.getSizeInBits();
2617     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2618     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2619       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2620       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2621       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2622       // Compute the high part as N1.
2623       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2624             DAG.getConstant(SimpleSize, DL,
2625                             getShiftAmountTy(Lo.getValueType())));
2626       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2627       // Compute the low part as N0.
2628       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2629       return CombineTo(N, Lo, Hi);
2630     }
2631   }
2632 
2633   return SDValue();
2634 }
2635 
2636 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2637   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2638     return Res;
2639 
2640   EVT VT = N->getValueType(0);
2641   SDLoc DL(N);
2642 
2643   // If the type is twice as wide is legal, transform the mulhu to a wider
2644   // multiply plus a shift.
2645   if (VT.isSimple() && !VT.isVector()) {
2646     MVT Simple = VT.getSimpleVT();
2647     unsigned SimpleSize = Simple.getSizeInBits();
2648     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2649     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2650       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2651       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2652       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2653       // Compute the high part as N1.
2654       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2655             DAG.getConstant(SimpleSize, DL,
2656                             getShiftAmountTy(Lo.getValueType())));
2657       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2658       // Compute the low part as N0.
2659       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2660       return CombineTo(N, Lo, Hi);
2661     }
2662   }
2663 
2664   return SDValue();
2665 }
2666 
2667 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2668   // (smulo x, 2) -> (saddo x, x)
2669   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2670     if (C2->getAPIntValue() == 2)
2671       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2672                          N->getOperand(0), N->getOperand(0));
2673 
2674   return SDValue();
2675 }
2676 
2677 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2678   // (umulo x, 2) -> (uaddo x, x)
2679   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2680     if (C2->getAPIntValue() == 2)
2681       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2682                          N->getOperand(0), N->getOperand(0));
2683 
2684   return SDValue();
2685 }
2686 
2687 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2688   SDValue N0 = N->getOperand(0);
2689   SDValue N1 = N->getOperand(1);
2690   EVT VT = N0.getValueType();
2691 
2692   // fold vector ops
2693   if (VT.isVector())
2694     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2695       return FoldedVOp;
2696 
2697   // fold (add c1, c2) -> c1+c2
2698   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
2699   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2700   if (N0C && N1C)
2701     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
2702 
2703   // canonicalize constant to RHS
2704   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2705      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2706     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
2707 
2708   return SDValue();
2709 }
2710 
2711 /// If this is a binary operator with two operands of the same opcode, try to
2712 /// simplify it.
2713 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2714   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2715   EVT VT = N0.getValueType();
2716   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2717 
2718   // Bail early if none of these transforms apply.
2719   if (N0.getNode()->getNumOperands() == 0) return SDValue();
2720 
2721   // For each of OP in AND/OR/XOR:
2722   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2723   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2724   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2725   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2726   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2727   //
2728   // do not sink logical op inside of a vector extend, since it may combine
2729   // into a vsetcc.
2730   EVT Op0VT = N0.getOperand(0).getValueType();
2731   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2732        N0.getOpcode() == ISD::SIGN_EXTEND ||
2733        N0.getOpcode() == ISD::BSWAP ||
2734        // Avoid infinite looping with PromoteIntBinOp.
2735        (N0.getOpcode() == ISD::ANY_EXTEND &&
2736         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2737        (N0.getOpcode() == ISD::TRUNCATE &&
2738         (!TLI.isZExtFree(VT, Op0VT) ||
2739          !TLI.isTruncateFree(Op0VT, VT)) &&
2740         TLI.isTypeLegal(Op0VT))) &&
2741       !VT.isVector() &&
2742       Op0VT == N1.getOperand(0).getValueType() &&
2743       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2744     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2745                                  N0.getOperand(0).getValueType(),
2746                                  N0.getOperand(0), N1.getOperand(0));
2747     AddToWorklist(ORNode.getNode());
2748     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2749   }
2750 
2751   // For each of OP in SHL/SRL/SRA/AND...
2752   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2753   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
2754   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2755   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2756        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2757       N0.getOperand(1) == N1.getOperand(1)) {
2758     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2759                                  N0.getOperand(0).getValueType(),
2760                                  N0.getOperand(0), N1.getOperand(0));
2761     AddToWorklist(ORNode.getNode());
2762     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2763                        ORNode, N0.getOperand(1));
2764   }
2765 
2766   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2767   // Only perform this optimization up until type legalization, before
2768   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2769   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2770   // we don't want to undo this promotion.
2771   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2772   // on scalars.
2773   if ((N0.getOpcode() == ISD::BITCAST ||
2774        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2775        Level <= AfterLegalizeTypes) {
2776     SDValue In0 = N0.getOperand(0);
2777     SDValue In1 = N1.getOperand(0);
2778     EVT In0Ty = In0.getValueType();
2779     EVT In1Ty = In1.getValueType();
2780     SDLoc DL(N);
2781     // If both incoming values are integers, and the original types are the
2782     // same.
2783     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2784       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2785       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2786       AddToWorklist(Op.getNode());
2787       return BC;
2788     }
2789   }
2790 
2791   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2792   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2793   // If both shuffles use the same mask, and both shuffle within a single
2794   // vector, then it is worthwhile to move the swizzle after the operation.
2795   // The type-legalizer generates this pattern when loading illegal
2796   // vector types from memory. In many cases this allows additional shuffle
2797   // optimizations.
2798   // There are other cases where moving the shuffle after the xor/and/or
2799   // is profitable even if shuffles don't perform a swizzle.
2800   // If both shuffles use the same mask, and both shuffles have the same first
2801   // or second operand, then it might still be profitable to move the shuffle
2802   // after the xor/and/or operation.
2803   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
2804     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2805     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2806 
2807     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2808            "Inputs to shuffles are not the same type");
2809 
2810     // Check that both shuffles use the same mask. The masks are known to be of
2811     // the same length because the result vector type is the same.
2812     // Check also that shuffles have only one use to avoid introducing extra
2813     // instructions.
2814     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2815         SVN0->getMask().equals(SVN1->getMask())) {
2816       SDValue ShOp = N0->getOperand(1);
2817 
2818       // Don't try to fold this node if it requires introducing a
2819       // build vector of all zeros that might be illegal at this stage.
2820       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2821         if (!LegalTypes)
2822           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2823         else
2824           ShOp = SDValue();
2825       }
2826 
2827       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2828       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
2829       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2830       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2831         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2832                                       N0->getOperand(0), N1->getOperand(0));
2833         AddToWorklist(NewNode.getNode());
2834         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2835                                     SVN0->getMask());
2836       }
2837 
2838       // Don't try to fold this node if it requires introducing a
2839       // build vector of all zeros that might be illegal at this stage.
2840       ShOp = N0->getOperand(0);
2841       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2842         if (!LegalTypes)
2843           ShOp = DAG.getConstant(0, SDLoc(N), VT);
2844         else
2845           ShOp = SDValue();
2846       }
2847 
2848       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2849       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
2850       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2851       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2852         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2853                                       N0->getOperand(1), N1->getOperand(1));
2854         AddToWorklist(NewNode.getNode());
2855         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2856                                     SVN0->getMask());
2857       }
2858     }
2859   }
2860 
2861   return SDValue();
2862 }
2863 
2864 /// This contains all DAGCombine rules which reduce two values combined by
2865 /// an And operation to a single value. This makes them reusable in the context
2866 /// of visitSELECT(). Rules involving constants are not included as
2867 /// visitSELECT() already handles those cases.
2868 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2869                                   SDNode *LocReference) {
2870   EVT VT = N1.getValueType();
2871 
2872   // fold (and x, undef) -> 0
2873   if (N0.isUndef() || N1.isUndef())
2874     return DAG.getConstant(0, SDLoc(LocReference), VT);
2875   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2876   SDValue LL, LR, RL, RR, CC0, CC1;
2877   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2878     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2879     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2880 
2881     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2882         LL.getValueType().isInteger()) {
2883       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2884       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2885         EVT CCVT = getSetCCResultType(LR.getValueType());
2886         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2887           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2888                                        LR.getValueType(), LL, RL);
2889           AddToWorklist(ORNode.getNode());
2890           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2891         }
2892       }
2893       if (isAllOnesConstant(LR)) {
2894         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2895         if (Op1 == ISD::SETEQ) {
2896           EVT CCVT = getSetCCResultType(LR.getValueType());
2897           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2898             SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2899                                           LR.getValueType(), LL, RL);
2900             AddToWorklist(ANDNode.getNode());
2901             return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2902           }
2903         }
2904         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2905         if (Op1 == ISD::SETGT) {
2906           EVT CCVT = getSetCCResultType(LR.getValueType());
2907           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2908             SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2909                                          LR.getValueType(), LL, RL);
2910             AddToWorklist(ORNode.getNode());
2911             return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2912           }
2913         }
2914       }
2915     }
2916     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2917     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2918         Op0 == Op1 && LL.getValueType().isInteger() &&
2919       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2920                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2921       EVT CCVT = getSetCCResultType(LL.getValueType());
2922       if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2923         SDLoc DL(N0);
2924         SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2925                                       LL, DAG.getConstant(1, DL,
2926                                                           LL.getValueType()));
2927         AddToWorklist(ADDNode.getNode());
2928         return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
2929                             DAG.getConstant(2, DL, LL.getValueType()),
2930                             ISD::SETUGE);
2931       }
2932     }
2933     // canonicalize equivalent to ll == rl
2934     if (LL == RR && LR == RL) {
2935       Op1 = ISD::getSetCCSwappedOperands(Op1);
2936       std::swap(RL, RR);
2937     }
2938     if (LL == RL && LR == RR) {
2939       bool isInteger = LL.getValueType().isInteger();
2940       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2941       if (Result != ISD::SETCC_INVALID &&
2942           (!LegalOperations ||
2943            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2944             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
2945         EVT CCVT = getSetCCResultType(LL.getValueType());
2946         if (N0.getValueType() == CCVT ||
2947             (!LegalOperations && N0.getValueType() == MVT::i1))
2948           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
2949                               LL, LR, Result);
2950       }
2951     }
2952   }
2953 
2954   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
2955       VT.getSizeInBits() <= 64) {
2956     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2957       APInt ADDC = ADDI->getAPIntValue();
2958       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2959         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
2960         // immediate for an add, but it is legal if its top c2 bits are set,
2961         // transform the ADD so the immediate doesn't need to be materialized
2962         // in a register.
2963         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
2964           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
2965                                              SRLI->getZExtValue());
2966           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
2967             ADDC |= Mask;
2968             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2969               SDLoc DL(N0);
2970               SDValue NewAdd =
2971                 DAG.getNode(ISD::ADD, DL, VT,
2972                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
2973               CombineTo(N0.getNode(), NewAdd);
2974               // Return N so it doesn't get rechecked!
2975               return SDValue(LocReference, 0);
2976             }
2977           }
2978         }
2979       }
2980     }
2981   }
2982 
2983   // Reduce bit extract of low half of an integer to the narrower type.
2984   // (and (srl i64:x, K), KMask) ->
2985   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
2986   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
2987     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
2988       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2989         unsigned Size = VT.getSizeInBits();
2990         const APInt &AndMask = CAnd->getAPIntValue();
2991         unsigned ShiftBits = CShift->getZExtValue();
2992         unsigned MaskBits = AndMask.countTrailingOnes();
2993         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
2994 
2995         if (APIntOps::isMask(AndMask) &&
2996             // Required bits must not span the two halves of the integer and
2997             // must fit in the half size type.
2998             (ShiftBits + MaskBits <= Size / 2) &&
2999             TLI.isNarrowingProfitable(VT, HalfVT) &&
3000             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3001             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3002             TLI.isTruncateFree(VT, HalfVT) &&
3003             TLI.isZExtFree(HalfVT, VT)) {
3004           // The isNarrowingProfitable is to avoid regressions on PPC and
3005           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3006           // on downstream users of this. Those patterns could probably be
3007           // extended to handle extensions mixed in.
3008 
3009           SDValue SL(N0);
3010           assert(ShiftBits != 0 && MaskBits <= Size);
3011 
3012           // Extracting the highest bit of the low half.
3013           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3014           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3015                                       N0.getOperand(0));
3016 
3017           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3018           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3019           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3020           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3021           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3022         }
3023       }
3024     }
3025   }
3026 
3027   return SDValue();
3028 }
3029 
3030 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3031                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3032                                    bool &NarrowLoad) {
3033   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3034 
3035   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
3036     return false;
3037 
3038   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3039   LoadedVT = LoadN->getMemoryVT();
3040 
3041   if (ExtVT == LoadedVT &&
3042       (!LegalOperations ||
3043        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3044     // ZEXTLOAD will match without needing to change the size of the value being
3045     // loaded.
3046     NarrowLoad = false;
3047     return true;
3048   }
3049 
3050   // Do not change the width of a volatile load.
3051   if (LoadN->isVolatile())
3052     return false;
3053 
3054   // Do not generate loads of non-round integer types since these can
3055   // be expensive (and would be wrong if the type is not byte sized).
3056   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3057     return false;
3058 
3059   if (LegalOperations &&
3060       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3061     return false;
3062 
3063   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3064     return false;
3065 
3066   NarrowLoad = true;
3067   return true;
3068 }
3069 
3070 SDValue DAGCombiner::visitAND(SDNode *N) {
3071   SDValue N0 = N->getOperand(0);
3072   SDValue N1 = N->getOperand(1);
3073   EVT VT = N1.getValueType();
3074 
3075   // fold vector ops
3076   if (VT.isVector()) {
3077     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3078       return FoldedVOp;
3079 
3080     // fold (and x, 0) -> 0, vector edition
3081     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3082       // do not return N0, because undef node may exist in N0
3083       return DAG.getConstant(
3084           APInt::getNullValue(
3085               N0.getValueType().getScalarType().getSizeInBits()),
3086           SDLoc(N), N0.getValueType());
3087     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3088       // do not return N1, because undef node may exist in N1
3089       return DAG.getConstant(
3090           APInt::getNullValue(
3091               N1.getValueType().getScalarType().getSizeInBits()),
3092           SDLoc(N), N1.getValueType());
3093 
3094     // fold (and x, -1) -> x, vector edition
3095     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3096       return N1;
3097     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3098       return N0;
3099   }
3100 
3101   // fold (and c1, c2) -> c1&c2
3102   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3103   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3104   if (N0C && N1C && !N1C->isOpaque())
3105     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3106   // canonicalize constant to RHS
3107   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3108      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3109     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3110   // fold (and x, -1) -> x
3111   if (isAllOnesConstant(N1))
3112     return N0;
3113   // if (and x, c) is known to be zero, return 0
3114   unsigned BitWidth = VT.getScalarType().getSizeInBits();
3115   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3116                                    APInt::getAllOnesValue(BitWidth)))
3117     return DAG.getConstant(0, SDLoc(N), VT);
3118   // reassociate and
3119   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3120     return RAND;
3121   // fold (and (or x, C), D) -> D if (C & D) == D
3122   if (N1C && N0.getOpcode() == ISD::OR)
3123     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3124       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3125         return N1;
3126   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3127   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3128     SDValue N0Op0 = N0.getOperand(0);
3129     APInt Mask = ~N1C->getAPIntValue();
3130     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3131     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3132       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3133                                  N0.getValueType(), N0Op0);
3134 
3135       // Replace uses of the AND with uses of the Zero extend node.
3136       CombineTo(N, Zext);
3137 
3138       // We actually want to replace all uses of the any_extend with the
3139       // zero_extend, to avoid duplicating things.  This will later cause this
3140       // AND to be folded.
3141       CombineTo(N0.getNode(), Zext);
3142       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3143     }
3144   }
3145   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3146   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3147   // already be zero by virtue of the width of the base type of the load.
3148   //
3149   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3150   // more cases.
3151   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3152        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3153        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3154        N0.getOperand(0).getResNo() == 0) ||
3155       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3156     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3157                                          N0 : N0.getOperand(0) );
3158 
3159     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3160     // This can be a pure constant or a vector splat, in which case we treat the
3161     // vector as a scalar and use the splat value.
3162     APInt Constant = APInt::getNullValue(1);
3163     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3164       Constant = C->getAPIntValue();
3165     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3166       APInt SplatValue, SplatUndef;
3167       unsigned SplatBitSize;
3168       bool HasAnyUndefs;
3169       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3170                                              SplatBitSize, HasAnyUndefs);
3171       if (IsSplat) {
3172         // Undef bits can contribute to a possible optimisation if set, so
3173         // set them.
3174         SplatValue |= SplatUndef;
3175 
3176         // The splat value may be something like "0x00FFFFFF", which means 0 for
3177         // the first vector value and FF for the rest, repeating. We need a mask
3178         // that will apply equally to all members of the vector, so AND all the
3179         // lanes of the constant together.
3180         EVT VT = Vector->getValueType(0);
3181         unsigned BitWidth = VT.getScalarType().getSizeInBits();
3182 
3183         // If the splat value has been compressed to a bitlength lower
3184         // than the size of the vector lane, we need to re-expand it to
3185         // the lane size.
3186         if (BitWidth > SplatBitSize)
3187           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3188                SplatBitSize < BitWidth;
3189                SplatBitSize = SplatBitSize * 2)
3190             SplatValue |= SplatValue.shl(SplatBitSize);
3191 
3192         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3193         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3194         if (SplatBitSize % BitWidth == 0) {
3195           Constant = APInt::getAllOnesValue(BitWidth);
3196           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3197             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3198         }
3199       }
3200     }
3201 
3202     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3203     // actually legal and isn't going to get expanded, else this is a false
3204     // optimisation.
3205     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3206                                                     Load->getValueType(0),
3207                                                     Load->getMemoryVT());
3208 
3209     // Resize the constant to the same size as the original memory access before
3210     // extension. If it is still the AllOnesValue then this AND is completely
3211     // unneeded.
3212     Constant =
3213       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
3214 
3215     bool B;
3216     switch (Load->getExtensionType()) {
3217     default: B = false; break;
3218     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3219     case ISD::ZEXTLOAD:
3220     case ISD::NON_EXTLOAD: B = true; break;
3221     }
3222 
3223     if (B && Constant.isAllOnesValue()) {
3224       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3225       // preserve semantics once we get rid of the AND.
3226       SDValue NewLoad(Load, 0);
3227       if (Load->getExtensionType() == ISD::EXTLOAD) {
3228         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3229                               Load->getValueType(0), SDLoc(Load),
3230                               Load->getChain(), Load->getBasePtr(),
3231                               Load->getOffset(), Load->getMemoryVT(),
3232                               Load->getMemOperand());
3233         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3234         if (Load->getNumValues() == 3) {
3235           // PRE/POST_INC loads have 3 values.
3236           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3237                            NewLoad.getValue(2) };
3238           CombineTo(Load, To, 3, true);
3239         } else {
3240           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3241         }
3242       }
3243 
3244       // Fold the AND away, taking care not to fold to the old load node if we
3245       // replaced it.
3246       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3247 
3248       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3249     }
3250   }
3251 
3252   // fold (and (load x), 255) -> (zextload x, i8)
3253   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3254   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3255   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3256                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3257                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3258     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3259     LoadSDNode *LN0 = HasAnyExt
3260       ? cast<LoadSDNode>(N0.getOperand(0))
3261       : cast<LoadSDNode>(N0);
3262     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3263         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3264       auto NarrowLoad = false;
3265       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3266       EVT ExtVT, LoadedVT;
3267       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3268                            NarrowLoad)) {
3269         if (!NarrowLoad) {
3270           SDValue NewLoad =
3271             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3272                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3273                            LN0->getMemOperand());
3274           AddToWorklist(N);
3275           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3276           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3277         } else {
3278           EVT PtrType = LN0->getOperand(1).getValueType();
3279 
3280           unsigned Alignment = LN0->getAlignment();
3281           SDValue NewPtr = LN0->getBasePtr();
3282 
3283           // For big endian targets, we need to add an offset to the pointer
3284           // to load the correct bytes.  For little endian systems, we merely
3285           // need to read fewer bytes from the same pointer.
3286           if (DAG.getDataLayout().isBigEndian()) {
3287             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3288             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3289             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3290             SDLoc DL(LN0);
3291             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3292                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3293             Alignment = MinAlign(Alignment, PtrOff);
3294           }
3295 
3296           AddToWorklist(NewPtr.getNode());
3297 
3298           SDValue Load = DAG.getExtLoad(
3299               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3300               LN0->getPointerInfo(), ExtVT, Alignment,
3301               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3302           AddToWorklist(N);
3303           CombineTo(LN0, Load, Load.getValue(1));
3304           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3305         }
3306       }
3307     }
3308   }
3309 
3310   if (SDValue Combined = visitANDLike(N0, N1, N))
3311     return Combined;
3312 
3313   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3314   if (N0.getOpcode() == N1.getOpcode())
3315     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3316       return Tmp;
3317 
3318   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3319   // fold (and (sra)) -> (and (srl)) when possible.
3320   if (!VT.isVector() &&
3321       SimplifyDemandedBits(SDValue(N, 0)))
3322     return SDValue(N, 0);
3323 
3324   // fold (zext_inreg (extload x)) -> (zextload x)
3325   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3326     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3327     EVT MemVT = LN0->getMemoryVT();
3328     // If we zero all the possible extended bits, then we can turn this into
3329     // a zextload if we are running before legalize or the operation is legal.
3330     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3331     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3332                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3333         ((!LegalOperations && !LN0->isVolatile()) ||
3334          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3335       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3336                                        LN0->getChain(), LN0->getBasePtr(),
3337                                        MemVT, LN0->getMemOperand());
3338       AddToWorklist(N);
3339       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3340       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3341     }
3342   }
3343   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3344   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3345       N0.hasOneUse()) {
3346     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3347     EVT MemVT = LN0->getMemoryVT();
3348     // If we zero all the possible extended bits, then we can turn this into
3349     // a zextload if we are running before legalize or the operation is legal.
3350     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3351     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3352                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3353         ((!LegalOperations && !LN0->isVolatile()) ||
3354          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3355       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3356                                        LN0->getChain(), LN0->getBasePtr(),
3357                                        MemVT, LN0->getMemOperand());
3358       AddToWorklist(N);
3359       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3360       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3361     }
3362   }
3363   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3364   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3365     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3366                                            N0.getOperand(1), false))
3367       return BSwap;
3368   }
3369 
3370   return SDValue();
3371 }
3372 
3373 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3374 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3375                                         bool DemandHighBits) {
3376   if (!LegalOperations)
3377     return SDValue();
3378 
3379   EVT VT = N->getValueType(0);
3380   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3381     return SDValue();
3382   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3383     return SDValue();
3384 
3385   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3386   bool LookPassAnd0 = false;
3387   bool LookPassAnd1 = false;
3388   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3389       std::swap(N0, N1);
3390   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3391       std::swap(N0, N1);
3392   if (N0.getOpcode() == ISD::AND) {
3393     if (!N0.getNode()->hasOneUse())
3394       return SDValue();
3395     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3396     if (!N01C || N01C->getZExtValue() != 0xFF00)
3397       return SDValue();
3398     N0 = N0.getOperand(0);
3399     LookPassAnd0 = true;
3400   }
3401 
3402   if (N1.getOpcode() == ISD::AND) {
3403     if (!N1.getNode()->hasOneUse())
3404       return SDValue();
3405     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3406     if (!N11C || N11C->getZExtValue() != 0xFF)
3407       return SDValue();
3408     N1 = N1.getOperand(0);
3409     LookPassAnd1 = true;
3410   }
3411 
3412   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3413     std::swap(N0, N1);
3414   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3415     return SDValue();
3416   if (!N0.getNode()->hasOneUse() ||
3417       !N1.getNode()->hasOneUse())
3418     return SDValue();
3419 
3420   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3421   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3422   if (!N01C || !N11C)
3423     return SDValue();
3424   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3425     return SDValue();
3426 
3427   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3428   SDValue N00 = N0->getOperand(0);
3429   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3430     if (!N00.getNode()->hasOneUse())
3431       return SDValue();
3432     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3433     if (!N001C || N001C->getZExtValue() != 0xFF)
3434       return SDValue();
3435     N00 = N00.getOperand(0);
3436     LookPassAnd0 = true;
3437   }
3438 
3439   SDValue N10 = N1->getOperand(0);
3440   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3441     if (!N10.getNode()->hasOneUse())
3442       return SDValue();
3443     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3444     if (!N101C || N101C->getZExtValue() != 0xFF00)
3445       return SDValue();
3446     N10 = N10.getOperand(0);
3447     LookPassAnd1 = true;
3448   }
3449 
3450   if (N00 != N10)
3451     return SDValue();
3452 
3453   // Make sure everything beyond the low halfword gets set to zero since the SRL
3454   // 16 will clear the top bits.
3455   unsigned OpSizeInBits = VT.getSizeInBits();
3456   if (DemandHighBits && OpSizeInBits > 16) {
3457     // If the left-shift isn't masked out then the only way this is a bswap is
3458     // if all bits beyond the low 8 are 0. In that case the entire pattern
3459     // reduces to a left shift anyway: leave it for other parts of the combiner.
3460     if (!LookPassAnd0)
3461       return SDValue();
3462 
3463     // However, if the right shift isn't masked out then it might be because
3464     // it's not needed. See if we can spot that too.
3465     if (!LookPassAnd1 &&
3466         !DAG.MaskedValueIsZero(
3467             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3468       return SDValue();
3469   }
3470 
3471   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3472   if (OpSizeInBits > 16) {
3473     SDLoc DL(N);
3474     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3475                       DAG.getConstant(OpSizeInBits - 16, DL,
3476                                       getShiftAmountTy(VT)));
3477   }
3478   return Res;
3479 }
3480 
3481 /// Return true if the specified node is an element that makes up a 32-bit
3482 /// packed halfword byteswap.
3483 /// ((x & 0x000000ff) << 8) |
3484 /// ((x & 0x0000ff00) >> 8) |
3485 /// ((x & 0x00ff0000) << 8) |
3486 /// ((x & 0xff000000) >> 8)
3487 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3488   if (!N.getNode()->hasOneUse())
3489     return false;
3490 
3491   unsigned Opc = N.getOpcode();
3492   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3493     return false;
3494 
3495   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3496   if (!N1C)
3497     return false;
3498 
3499   unsigned Num;
3500   switch (N1C->getZExtValue()) {
3501   default:
3502     return false;
3503   case 0xFF:       Num = 0; break;
3504   case 0xFF00:     Num = 1; break;
3505   case 0xFF0000:   Num = 2; break;
3506   case 0xFF000000: Num = 3; break;
3507   }
3508 
3509   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3510   SDValue N0 = N.getOperand(0);
3511   if (Opc == ISD::AND) {
3512     if (Num == 0 || Num == 2) {
3513       // (x >> 8) & 0xff
3514       // (x >> 8) & 0xff0000
3515       if (N0.getOpcode() != ISD::SRL)
3516         return false;
3517       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3518       if (!C || C->getZExtValue() != 8)
3519         return false;
3520     } else {
3521       // (x << 8) & 0xff00
3522       // (x << 8) & 0xff000000
3523       if (N0.getOpcode() != ISD::SHL)
3524         return false;
3525       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3526       if (!C || C->getZExtValue() != 8)
3527         return false;
3528     }
3529   } else if (Opc == ISD::SHL) {
3530     // (x & 0xff) << 8
3531     // (x & 0xff0000) << 8
3532     if (Num != 0 && Num != 2)
3533       return false;
3534     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3535     if (!C || C->getZExtValue() != 8)
3536       return false;
3537   } else { // Opc == ISD::SRL
3538     // (x & 0xff00) >> 8
3539     // (x & 0xff000000) >> 8
3540     if (Num != 1 && Num != 3)
3541       return false;
3542     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3543     if (!C || C->getZExtValue() != 8)
3544       return false;
3545   }
3546 
3547   if (Parts[Num])
3548     return false;
3549 
3550   Parts[Num] = N0.getOperand(0).getNode();
3551   return true;
3552 }
3553 
3554 /// Match a 32-bit packed halfword bswap. That is
3555 /// ((x & 0x000000ff) << 8) |
3556 /// ((x & 0x0000ff00) >> 8) |
3557 /// ((x & 0x00ff0000) << 8) |
3558 /// ((x & 0xff000000) >> 8)
3559 /// => (rotl (bswap x), 16)
3560 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3561   if (!LegalOperations)
3562     return SDValue();
3563 
3564   EVT VT = N->getValueType(0);
3565   if (VT != MVT::i32)
3566     return SDValue();
3567   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3568     return SDValue();
3569 
3570   // Look for either
3571   // (or (or (and), (and)), (or (and), (and)))
3572   // (or (or (or (and), (and)), (and)), (and))
3573   if (N0.getOpcode() != ISD::OR)
3574     return SDValue();
3575   SDValue N00 = N0.getOperand(0);
3576   SDValue N01 = N0.getOperand(1);
3577   SDNode *Parts[4] = {};
3578 
3579   if (N1.getOpcode() == ISD::OR &&
3580       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3581     // (or (or (and), (and)), (or (and), (and)))
3582     SDValue N000 = N00.getOperand(0);
3583     if (!isBSwapHWordElement(N000, Parts))
3584       return SDValue();
3585 
3586     SDValue N001 = N00.getOperand(1);
3587     if (!isBSwapHWordElement(N001, Parts))
3588       return SDValue();
3589     SDValue N010 = N01.getOperand(0);
3590     if (!isBSwapHWordElement(N010, Parts))
3591       return SDValue();
3592     SDValue N011 = N01.getOperand(1);
3593     if (!isBSwapHWordElement(N011, Parts))
3594       return SDValue();
3595   } else {
3596     // (or (or (or (and), (and)), (and)), (and))
3597     if (!isBSwapHWordElement(N1, Parts))
3598       return SDValue();
3599     if (!isBSwapHWordElement(N01, Parts))
3600       return SDValue();
3601     if (N00.getOpcode() != ISD::OR)
3602       return SDValue();
3603     SDValue N000 = N00.getOperand(0);
3604     if (!isBSwapHWordElement(N000, Parts))
3605       return SDValue();
3606     SDValue N001 = N00.getOperand(1);
3607     if (!isBSwapHWordElement(N001, Parts))
3608       return SDValue();
3609   }
3610 
3611   // Make sure the parts are all coming from the same node.
3612   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3613     return SDValue();
3614 
3615   SDLoc DL(N);
3616   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3617                               SDValue(Parts[0], 0));
3618 
3619   // Result of the bswap should be rotated by 16. If it's not legal, then
3620   // do  (x << 16) | (x >> 16).
3621   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3622   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3623     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3624   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3625     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3626   return DAG.getNode(ISD::OR, DL, VT,
3627                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3628                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3629 }
3630 
3631 /// This contains all DAGCombine rules which reduce two values combined by
3632 /// an Or operation to a single value \see visitANDLike().
3633 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3634   EVT VT = N1.getValueType();
3635   // fold (or x, undef) -> -1
3636   if (!LegalOperations &&
3637       (N0.isUndef() || N1.isUndef())) {
3638     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3639     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3640                            SDLoc(LocReference), VT);
3641   }
3642   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3643   SDValue LL, LR, RL, RR, CC0, CC1;
3644   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3645     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3646     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3647 
3648     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3649       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3650       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3651       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3652         EVT CCVT = getSetCCResultType(LR.getValueType());
3653         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3654           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3655                                        LR.getValueType(), LL, RL);
3656           AddToWorklist(ORNode.getNode());
3657           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3658         }
3659       }
3660       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3661       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
3662       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3663         EVT CCVT = getSetCCResultType(LR.getValueType());
3664         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3665           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3666                                         LR.getValueType(), LL, RL);
3667           AddToWorklist(ANDNode.getNode());
3668           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3669         }
3670       }
3671     }
3672     // canonicalize equivalent to ll == rl
3673     if (LL == RR && LR == RL) {
3674       Op1 = ISD::getSetCCSwappedOperands(Op1);
3675       std::swap(RL, RR);
3676     }
3677     if (LL == RL && LR == RR) {
3678       bool isInteger = LL.getValueType().isInteger();
3679       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3680       if (Result != ISD::SETCC_INVALID &&
3681           (!LegalOperations ||
3682            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3683             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3684         EVT CCVT = getSetCCResultType(LL.getValueType());
3685         if (N0.getValueType() == CCVT ||
3686             (!LegalOperations && N0.getValueType() == MVT::i1))
3687           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3688                               LL, LR, Result);
3689       }
3690     }
3691   }
3692 
3693   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
3694   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3695       // Don't increase # computations.
3696       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3697     // We can only do this xform if we know that bits from X that are set in C2
3698     // but not in C1 are already zero.  Likewise for Y.
3699     if (const ConstantSDNode *N0O1C =
3700         getAsNonOpaqueConstant(N0.getOperand(1))) {
3701       if (const ConstantSDNode *N1O1C =
3702           getAsNonOpaqueConstant(N1.getOperand(1))) {
3703         // We can only do this xform if we know that bits from X that are set in
3704         // C2 but not in C1 are already zero.  Likewise for Y.
3705         const APInt &LHSMask = N0O1C->getAPIntValue();
3706         const APInt &RHSMask = N1O1C->getAPIntValue();
3707 
3708         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3709             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3710           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3711                                   N0.getOperand(0), N1.getOperand(0));
3712           SDLoc DL(LocReference);
3713           return DAG.getNode(ISD::AND, DL, VT, X,
3714                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
3715         }
3716       }
3717     }
3718   }
3719 
3720   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3721   if (N0.getOpcode() == ISD::AND &&
3722       N1.getOpcode() == ISD::AND &&
3723       N0.getOperand(0) == N1.getOperand(0) &&
3724       // Don't increase # computations.
3725       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3726     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3727                             N0.getOperand(1), N1.getOperand(1));
3728     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3729   }
3730 
3731   return SDValue();
3732 }
3733 
3734 SDValue DAGCombiner::visitOR(SDNode *N) {
3735   SDValue N0 = N->getOperand(0);
3736   SDValue N1 = N->getOperand(1);
3737   EVT VT = N1.getValueType();
3738 
3739   // fold vector ops
3740   if (VT.isVector()) {
3741     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3742       return FoldedVOp;
3743 
3744     // fold (or x, 0) -> x, vector edition
3745     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3746       return N1;
3747     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3748       return N0;
3749 
3750     // fold (or x, -1) -> -1, vector edition
3751     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3752       // do not return N0, because undef node may exist in N0
3753       return DAG.getConstant(
3754           APInt::getAllOnesValue(
3755               N0.getValueType().getScalarType().getSizeInBits()),
3756           SDLoc(N), N0.getValueType());
3757     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3758       // do not return N1, because undef node may exist in N1
3759       return DAG.getConstant(
3760           APInt::getAllOnesValue(
3761               N1.getValueType().getScalarType().getSizeInBits()),
3762           SDLoc(N), N1.getValueType());
3763 
3764     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
3765     // Do this only if the resulting shuffle is legal.
3766     if (isa<ShuffleVectorSDNode>(N0) &&
3767         isa<ShuffleVectorSDNode>(N1) &&
3768         // Avoid folding a node with illegal type.
3769         TLI.isTypeLegal(VT)) {
3770       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
3771       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
3772       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
3773       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
3774       // Ensure both shuffles have a zero input.
3775       if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
3776         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
3777         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
3778         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3779         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3780         bool CanFold = true;
3781         int NumElts = VT.getVectorNumElements();
3782         SmallVector<int, 4> Mask(NumElts);
3783 
3784         for (int i = 0; i != NumElts; ++i) {
3785           int M0 = SV0->getMaskElt(i);
3786           int M1 = SV1->getMaskElt(i);
3787 
3788           // Determine if either index is pointing to a zero vector.
3789           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
3790           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
3791 
3792           // If one element is zero and the otherside is undef, keep undef.
3793           // This also handles the case that both are undef.
3794           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
3795             Mask[i] = -1;
3796             continue;
3797           }
3798 
3799           // Make sure only one of the elements is zero.
3800           if (M0Zero == M1Zero) {
3801             CanFold = false;
3802             break;
3803           }
3804 
3805           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
3806 
3807           // We have a zero and non-zero element. If the non-zero came from
3808           // SV0 make the index a LHS index. If it came from SV1, make it
3809           // a RHS index. We need to mod by NumElts because we don't care
3810           // which operand it came from in the original shuffles.
3811           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
3812         }
3813 
3814         if (CanFold) {
3815           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
3816           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
3817 
3818           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3819           if (!LegalMask) {
3820             std::swap(NewLHS, NewRHS);
3821             ShuffleVectorSDNode::commuteMask(Mask);
3822             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3823           }
3824 
3825           if (LegalMask)
3826             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
3827         }
3828       }
3829     }
3830   }
3831 
3832   // fold (or c1, c2) -> c1|c2
3833   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3834   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3835   if (N0C && N1C && !N1C->isOpaque())
3836     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3837   // canonicalize constant to RHS
3838   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3839      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3840     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3841   // fold (or x, 0) -> x
3842   if (isNullConstant(N1))
3843     return N0;
3844   // fold (or x, -1) -> -1
3845   if (isAllOnesConstant(N1))
3846     return N1;
3847   // fold (or x, c) -> c iff (x & ~c) == 0
3848   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3849     return N1;
3850 
3851   if (SDValue Combined = visitORLike(N0, N1, N))
3852     return Combined;
3853 
3854   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3855   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
3856     return BSwap;
3857   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
3858     return BSwap;
3859 
3860   // reassociate or
3861   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3862     return ROR;
3863   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3864   // iff (c1 & c2) == 0.
3865   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3866              isa<ConstantSDNode>(N0.getOperand(1))) {
3867     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3868     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3869       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3870                                                    N1C, C1))
3871         return DAG.getNode(
3872             ISD::AND, SDLoc(N), VT,
3873             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3874       return SDValue();
3875     }
3876   }
3877   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
3878   if (N0.getOpcode() == N1.getOpcode())
3879     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3880       return Tmp;
3881 
3882   // See if this is some rotate idiom.
3883   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3884     return SDValue(Rot, 0);
3885 
3886   // Simplify the operands using demanded-bits information.
3887   if (!VT.isVector() &&
3888       SimplifyDemandedBits(SDValue(N, 0)))
3889     return SDValue(N, 0);
3890 
3891   return SDValue();
3892 }
3893 
3894 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3895 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3896   if (Op.getOpcode() == ISD::AND) {
3897     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
3898       Mask = Op.getOperand(1);
3899       Op = Op.getOperand(0);
3900     } else {
3901       return false;
3902     }
3903   }
3904 
3905   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3906     Shift = Op;
3907     return true;
3908   }
3909 
3910   return false;
3911 }
3912 
3913 // Return true if we can prove that, whenever Neg and Pos are both in the
3914 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
3915 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3916 //
3917 //     (or (shift1 X, Neg), (shift2 X, Pos))
3918 //
3919 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3920 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
3921 // to consider shift amounts with defined behavior.
3922 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
3923   // If EltSize is a power of 2 then:
3924   //
3925   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
3926   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
3927   //
3928   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
3929   // for the stronger condition:
3930   //
3931   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
3932   //
3933   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
3934   // we can just replace Neg with Neg' for the rest of the function.
3935   //
3936   // In other cases we check for the even stronger condition:
3937   //
3938   //     Neg == EltSize - Pos                                    [B]
3939   //
3940   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
3941   // behavior if Pos == 0 (and consequently Neg == EltSize).
3942   //
3943   // We could actually use [A] whenever EltSize is a power of 2, but the
3944   // only extra cases that it would match are those uninteresting ones
3945   // where Neg and Pos are never in range at the same time.  E.g. for
3946   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
3947   // as well as (sub 32, Pos), but:
3948   //
3949   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
3950   //
3951   // always invokes undefined behavior for 32-bit X.
3952   //
3953   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
3954   unsigned MaskLoBits = 0;
3955   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
3956     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
3957       if (NegC->getAPIntValue() == EltSize - 1) {
3958         Neg = Neg.getOperand(0);
3959         MaskLoBits = Log2_64(EltSize);
3960       }
3961     }
3962   }
3963 
3964   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
3965   if (Neg.getOpcode() != ISD::SUB)
3966     return false;
3967   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
3968   if (!NegC)
3969     return false;
3970   SDValue NegOp1 = Neg.getOperand(1);
3971 
3972   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
3973   // Pos'.  The truncation is redundant for the purpose of the equality.
3974   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
3975     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
3976       if (PosC->getAPIntValue() == EltSize - 1)
3977         Pos = Pos.getOperand(0);
3978 
3979   // The condition we need is now:
3980   //
3981   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
3982   //
3983   // If NegOp1 == Pos then we need:
3984   //
3985   //              EltSize & Mask == NegC & Mask
3986   //
3987   // (because "x & Mask" is a truncation and distributes through subtraction).
3988   APInt Width;
3989   if (Pos == NegOp1)
3990     Width = NegC->getAPIntValue();
3991 
3992   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
3993   // Then the condition we want to prove becomes:
3994   //
3995   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
3996   //
3997   // which, again because "x & Mask" is a truncation, becomes:
3998   //
3999   //                NegC & Mask == (EltSize - PosC) & Mask
4000   //             EltSize & Mask == (NegC + PosC) & Mask
4001   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4002     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4003       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4004     else
4005       return false;
4006   } else
4007     return false;
4008 
4009   // Now we just need to check that EltSize & Mask == Width & Mask.
4010   if (MaskLoBits)
4011     // EltSize & Mask is 0 since Mask is EltSize - 1.
4012     return Width.getLoBits(MaskLoBits) == 0;
4013   return Width == EltSize;
4014 }
4015 
4016 // A subroutine of MatchRotate used once we have found an OR of two opposite
4017 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4018 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4019 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4020 // Neg with outer conversions stripped away.
4021 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4022                                        SDValue Neg, SDValue InnerPos,
4023                                        SDValue InnerNeg, unsigned PosOpcode,
4024                                        unsigned NegOpcode, const SDLoc &DL) {
4025   // fold (or (shl x, (*ext y)),
4026   //          (srl x, (*ext (sub 32, y)))) ->
4027   //   (rotl x, y) or (rotr x, (sub 32, y))
4028   //
4029   // fold (or (shl x, (*ext (sub 32, y))),
4030   //          (srl x, (*ext y))) ->
4031   //   (rotr x, y) or (rotl x, (sub 32, y))
4032   EVT VT = Shifted.getValueType();
4033   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4034     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4035     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4036                        HasPos ? Pos : Neg).getNode();
4037   }
4038 
4039   return nullptr;
4040 }
4041 
4042 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4043 // idioms for rotate, and if the target supports rotation instructions, generate
4044 // a rot[lr].
4045 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4046   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4047   EVT VT = LHS.getValueType();
4048   if (!TLI.isTypeLegal(VT)) return nullptr;
4049 
4050   // The target must have at least one rotate flavor.
4051   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4052   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4053   if (!HasROTL && !HasROTR) return nullptr;
4054 
4055   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4056   SDValue LHSShift;   // The shift.
4057   SDValue LHSMask;    // AND value if any.
4058   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4059     return nullptr; // Not part of a rotate.
4060 
4061   SDValue RHSShift;   // The shift.
4062   SDValue RHSMask;    // AND value if any.
4063   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4064     return nullptr; // Not part of a rotate.
4065 
4066   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4067     return nullptr;   // Not shifting the same value.
4068 
4069   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4070     return nullptr;   // Shifts must disagree.
4071 
4072   // Canonicalize shl to left side in a shl/srl pair.
4073   if (RHSShift.getOpcode() == ISD::SHL) {
4074     std::swap(LHS, RHS);
4075     std::swap(LHSShift, RHSShift);
4076     std::swap(LHSMask, RHSMask);
4077   }
4078 
4079   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4080   SDValue LHSShiftArg = LHSShift.getOperand(0);
4081   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4082   SDValue RHSShiftArg = RHSShift.getOperand(0);
4083   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4084 
4085   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4086   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4087   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4088     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4089     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4090     if ((LShVal + RShVal) != EltSizeInBits)
4091       return nullptr;
4092 
4093     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4094                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4095 
4096     // If there is an AND of either shifted operand, apply it to the result.
4097     if (LHSMask.getNode() || RHSMask.getNode()) {
4098       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4099       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
4100 
4101       if (LHSMask.getNode()) {
4102         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4103         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4104                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4105                                        DAG.getConstant(RHSBits, DL, VT)));
4106       }
4107       if (RHSMask.getNode()) {
4108         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4109         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4110                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4111                                        DAG.getConstant(LHSBits, DL, VT)));
4112       }
4113 
4114       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4115     }
4116 
4117     return Rot.getNode();
4118   }
4119 
4120   // If there is a mask here, and we have a variable shift, we can't be sure
4121   // that we're masking out the right stuff.
4122   if (LHSMask.getNode() || RHSMask.getNode())
4123     return nullptr;
4124 
4125   // If the shift amount is sign/zext/any-extended just peel it off.
4126   SDValue LExtOp0 = LHSShiftAmt;
4127   SDValue RExtOp0 = RHSShiftAmt;
4128   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4129        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4130        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4131        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4132       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4133        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4134        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4135        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4136     LExtOp0 = LHSShiftAmt.getOperand(0);
4137     RExtOp0 = RHSShiftAmt.getOperand(0);
4138   }
4139 
4140   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4141                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4142   if (TryL)
4143     return TryL;
4144 
4145   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4146                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4147   if (TryR)
4148     return TryR;
4149 
4150   return nullptr;
4151 }
4152 
4153 SDValue DAGCombiner::visitXOR(SDNode *N) {
4154   SDValue N0 = N->getOperand(0);
4155   SDValue N1 = N->getOperand(1);
4156   EVT VT = N0.getValueType();
4157 
4158   // fold vector ops
4159   if (VT.isVector()) {
4160     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4161       return FoldedVOp;
4162 
4163     // fold (xor x, 0) -> x, vector edition
4164     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4165       return N1;
4166     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4167       return N0;
4168   }
4169 
4170   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4171   if (N0.isUndef() && N1.isUndef())
4172     return DAG.getConstant(0, SDLoc(N), VT);
4173   // fold (xor x, undef) -> undef
4174   if (N0.isUndef())
4175     return N0;
4176   if (N1.isUndef())
4177     return N1;
4178   // fold (xor c1, c2) -> c1^c2
4179   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4180   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4181   if (N0C && N1C)
4182     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4183   // canonicalize constant to RHS
4184   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4185      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4186     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4187   // fold (xor x, 0) -> x
4188   if (isNullConstant(N1))
4189     return N0;
4190   // reassociate xor
4191   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4192     return RXOR;
4193 
4194   // fold !(x cc y) -> (x !cc y)
4195   SDValue LHS, RHS, CC;
4196   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4197     bool isInt = LHS.getValueType().isInteger();
4198     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4199                                                isInt);
4200 
4201     if (!LegalOperations ||
4202         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4203       switch (N0.getOpcode()) {
4204       default:
4205         llvm_unreachable("Unhandled SetCC Equivalent!");
4206       case ISD::SETCC:
4207         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4208       case ISD::SELECT_CC:
4209         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4210                                N0.getOperand(3), NotCC);
4211       }
4212     }
4213   }
4214 
4215   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4216   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4217       N0.getNode()->hasOneUse() &&
4218       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4219     SDValue V = N0.getOperand(0);
4220     SDLoc DL(N0);
4221     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4222                     DAG.getConstant(1, DL, V.getValueType()));
4223     AddToWorklist(V.getNode());
4224     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4225   }
4226 
4227   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4228   if (isOneConstant(N1) && VT == MVT::i1 &&
4229       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4230     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4231     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4232       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4233       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4234       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4235       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4236       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4237     }
4238   }
4239   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4240   if (isAllOnesConstant(N1) &&
4241       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4242     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4243     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4244       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4245       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4246       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4247       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4248       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4249     }
4250   }
4251   // fold (xor (and x, y), y) -> (and (not x), y)
4252   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4253       N0->getOperand(1) == N1) {
4254     SDValue X = N0->getOperand(0);
4255     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4256     AddToWorklist(NotX.getNode());
4257     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4258   }
4259   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4260   if (N1C && N0.getOpcode() == ISD::XOR) {
4261     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4262       SDLoc DL(N);
4263       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4264                          DAG.getConstant(N1C->getAPIntValue() ^
4265                                          N00C->getAPIntValue(), DL, VT));
4266     }
4267     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4268       SDLoc DL(N);
4269       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4270                          DAG.getConstant(N1C->getAPIntValue() ^
4271                                          N01C->getAPIntValue(), DL, VT));
4272     }
4273   }
4274   // fold (xor x, x) -> 0
4275   if (N0 == N1)
4276     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4277 
4278   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4279   // Here is a concrete example of this equivalence:
4280   // i16   x ==  14
4281   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
4282   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4283   //
4284   // =>
4285   //
4286   // i16     ~1      == 0b1111111111111110
4287   // i16 rol(~1, 14) == 0b1011111111111111
4288   //
4289   // Some additional tips to help conceptualize this transform:
4290   // - Try to see the operation as placing a single zero in a value of all ones.
4291   // - There exists no value for x which would allow the result to contain zero.
4292   // - Values of x larger than the bitwidth are undefined and do not require a
4293   //   consistent result.
4294   // - Pushing the zero left requires shifting one bits in from the right.
4295   // A rotate left of ~1 is a nice way of achieving the desired result.
4296   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4297       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4298     SDLoc DL(N);
4299     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4300                        N0.getOperand(1));
4301   }
4302 
4303   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
4304   if (N0.getOpcode() == N1.getOpcode())
4305     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4306       return Tmp;
4307 
4308   // Simplify the expression using non-local knowledge.
4309   if (!VT.isVector() &&
4310       SimplifyDemandedBits(SDValue(N, 0)))
4311     return SDValue(N, 0);
4312 
4313   return SDValue();
4314 }
4315 
4316 /// Handle transforms common to the three shifts, when the shift amount is a
4317 /// constant.
4318 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4319   SDNode *LHS = N->getOperand(0).getNode();
4320   if (!LHS->hasOneUse()) return SDValue();
4321 
4322   // We want to pull some binops through shifts, so that we have (and (shift))
4323   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
4324   // thing happens with address calculations, so it's important to canonicalize
4325   // it.
4326   bool HighBitSet = false;  // Can we transform this if the high bit is set?
4327 
4328   switch (LHS->getOpcode()) {
4329   default: return SDValue();
4330   case ISD::OR:
4331   case ISD::XOR:
4332     HighBitSet = false; // We can only transform sra if the high bit is clear.
4333     break;
4334   case ISD::AND:
4335     HighBitSet = true;  // We can only transform sra if the high bit is set.
4336     break;
4337   case ISD::ADD:
4338     if (N->getOpcode() != ISD::SHL)
4339       return SDValue(); // only shl(add) not sr[al](add).
4340     HighBitSet = false; // We can only transform sra if the high bit is clear.
4341     break;
4342   }
4343 
4344   // We require the RHS of the binop to be a constant and not opaque as well.
4345   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4346   if (!BinOpCst) return SDValue();
4347 
4348   // FIXME: disable this unless the input to the binop is a shift by a constant.
4349   // If it is not a shift, it pessimizes some common cases like:
4350   //
4351   //    void foo(int *X, int i) { X[i & 1235] = 1; }
4352   //    int bar(int *X, int i) { return X[i & 255]; }
4353   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4354   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
4355        BinOpLHSVal->getOpcode() != ISD::SRA &&
4356        BinOpLHSVal->getOpcode() != ISD::SRL) ||
4357       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
4358     return SDValue();
4359 
4360   EVT VT = N->getValueType(0);
4361 
4362   // If this is a signed shift right, and the high bit is modified by the
4363   // logical operation, do not perform the transformation. The highBitSet
4364   // boolean indicates the value of the high bit of the constant which would
4365   // cause it to be modified for this operation.
4366   if (N->getOpcode() == ISD::SRA) {
4367     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4368     if (BinOpRHSSignSet != HighBitSet)
4369       return SDValue();
4370   }
4371 
4372   if (!TLI.isDesirableToCommuteWithShift(LHS))
4373     return SDValue();
4374 
4375   // Fold the constants, shifting the binop RHS by the shift amount.
4376   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4377                                N->getValueType(0),
4378                                LHS->getOperand(1), N->getOperand(1));
4379   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4380 
4381   // Create the new shift.
4382   SDValue NewShift = DAG.getNode(N->getOpcode(),
4383                                  SDLoc(LHS->getOperand(0)),
4384                                  VT, LHS->getOperand(0), N->getOperand(1));
4385 
4386   // Create the new binop.
4387   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4388 }
4389 
4390 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4391   assert(N->getOpcode() == ISD::TRUNCATE);
4392   assert(N->getOperand(0).getOpcode() == ISD::AND);
4393 
4394   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4395   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4396     SDValue N01 = N->getOperand(0).getOperand(1);
4397 
4398     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
4399       if (!N01C->isOpaque()) {
4400         EVT TruncVT = N->getValueType(0);
4401         SDValue N00 = N->getOperand(0).getOperand(0);
4402         APInt TruncC = N01C->getAPIntValue();
4403         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
4404         SDLoc DL(N);
4405 
4406         return DAG.getNode(ISD::AND, DL, TruncVT,
4407                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
4408                            DAG.getConstant(TruncC, DL, TruncVT));
4409       }
4410     }
4411   }
4412 
4413   return SDValue();
4414 }
4415 
4416 SDValue DAGCombiner::visitRotate(SDNode *N) {
4417   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4418   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4419       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4420     if (SDValue NewOp1 =
4421             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
4422       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4423                          N->getOperand(0), NewOp1);
4424   }
4425   return SDValue();
4426 }
4427 
4428 SDValue DAGCombiner::visitSHL(SDNode *N) {
4429   SDValue N0 = N->getOperand(0);
4430   SDValue N1 = N->getOperand(1);
4431   EVT VT = N0.getValueType();
4432   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4433 
4434   // fold vector ops
4435   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4436   if (VT.isVector()) {
4437     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4438       return FoldedVOp;
4439 
4440     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
4441     // If setcc produces all-one true value then:
4442     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4443     if (N1CV && N1CV->isConstant()) {
4444       if (N0.getOpcode() == ISD::AND) {
4445         SDValue N00 = N0->getOperand(0);
4446         SDValue N01 = N0->getOperand(1);
4447         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
4448 
4449         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4450             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4451                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
4452           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4453                                                      N01CV, N1CV))
4454             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4455         }
4456       } else {
4457         N1C = isConstOrConstSplat(N1);
4458       }
4459     }
4460   }
4461 
4462   // fold (shl c1, c2) -> c1<<c2
4463   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4464   if (N0C && N1C && !N1C->isOpaque())
4465     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4466   // fold (shl 0, x) -> 0
4467   if (isNullConstant(N0))
4468     return N0;
4469   // fold (shl x, c >= size(x)) -> undef
4470   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4471     return DAG.getUNDEF(VT);
4472   // fold (shl x, 0) -> x
4473   if (N1C && N1C->isNullValue())
4474     return N0;
4475   // fold (shl undef, x) -> 0
4476   if (N0.isUndef())
4477     return DAG.getConstant(0, SDLoc(N), VT);
4478   // if (shl x, c) is known to be zero, return 0
4479   if (DAG.MaskedValueIsZero(SDValue(N, 0),
4480                             APInt::getAllOnesValue(OpSizeInBits)))
4481     return DAG.getConstant(0, SDLoc(N), VT);
4482   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4483   if (N1.getOpcode() == ISD::TRUNCATE &&
4484       N1.getOperand(0).getOpcode() == ISD::AND) {
4485     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4486       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4487   }
4488 
4489   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4490     return SDValue(N, 0);
4491 
4492   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4493   if (N1C && N0.getOpcode() == ISD::SHL) {
4494     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4495       SDLoc DL(N);
4496       APInt c1 = N0C1->getAPIntValue();
4497       APInt c2 = N1C->getAPIntValue();
4498       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4499 
4500       APInt Sum = c1 + c2;
4501       if (Sum.uge(OpSizeInBits))
4502         return DAG.getConstant(0, DL, VT);
4503 
4504       return DAG.getNode(
4505           ISD::SHL, DL, VT, N0.getOperand(0),
4506           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4507     }
4508   }
4509 
4510   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4511   // For this to be valid, the second form must not preserve any of the bits
4512   // that are shifted out by the inner shift in the first form.  This means
4513   // the outer shift size must be >= the number of bits added by the ext.
4514   // As a corollary, we don't care what kind of ext it is.
4515   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4516               N0.getOpcode() == ISD::ANY_EXTEND ||
4517               N0.getOpcode() == ISD::SIGN_EXTEND) &&
4518       N0.getOperand(0).getOpcode() == ISD::SHL) {
4519     SDValue N0Op0 = N0.getOperand(0);
4520     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4521       uint64_t c1 = N0Op0C1->getZExtValue();
4522       uint64_t c2 = N1C->getZExtValue();
4523       EVT InnerShiftVT = N0Op0.getValueType();
4524       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4525       if (c2 >= OpSizeInBits - InnerShiftSize) {
4526         SDLoc DL(N0);
4527         if (c1 + c2 >= OpSizeInBits)
4528           return DAG.getConstant(0, DL, VT);
4529         return DAG.getNode(ISD::SHL, DL, VT,
4530                            DAG.getNode(N0.getOpcode(), DL, VT,
4531                                        N0Op0->getOperand(0)),
4532                            DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4533       }
4534     }
4535   }
4536 
4537   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4538   // Only fold this if the inner zext has no other uses to avoid increasing
4539   // the total number of instructions.
4540   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4541       N0.getOperand(0).getOpcode() == ISD::SRL) {
4542     SDValue N0Op0 = N0.getOperand(0);
4543     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4544       uint64_t c1 = N0Op0C1->getZExtValue();
4545       if (c1 < VT.getScalarSizeInBits()) {
4546         uint64_t c2 = N1C->getZExtValue();
4547         if (c1 == c2) {
4548           SDValue NewOp0 = N0.getOperand(0);
4549           EVT CountVT = NewOp0.getOperand(1).getValueType();
4550           SDLoc DL(N);
4551           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4552                                        NewOp0,
4553                                        DAG.getConstant(c2, DL, CountVT));
4554           AddToWorklist(NewSHL.getNode());
4555           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4556         }
4557       }
4558     }
4559   }
4560 
4561   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
4562   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
4563   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4564       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4565     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4566       uint64_t C1 = N0C1->getZExtValue();
4567       uint64_t C2 = N1C->getZExtValue();
4568       SDLoc DL(N);
4569       if (C1 <= C2)
4570         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4571                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4572       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4573                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4574     }
4575   }
4576 
4577   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4578   //                               (and (srl x, (sub c1, c2), MASK)
4579   // Only fold this if the inner shift has no other uses -- if it does, folding
4580   // this will increase the total number of instructions.
4581   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4582     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4583       uint64_t c1 = N0C1->getZExtValue();
4584       if (c1 < OpSizeInBits) {
4585         uint64_t c2 = N1C->getZExtValue();
4586         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4587         SDValue Shift;
4588         if (c2 > c1) {
4589           Mask = Mask.shl(c2 - c1);
4590           SDLoc DL(N);
4591           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4592                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4593         } else {
4594           Mask = Mask.lshr(c1 - c2);
4595           SDLoc DL(N);
4596           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4597                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4598         }
4599         SDLoc DL(N0);
4600         return DAG.getNode(ISD::AND, DL, VT, Shift,
4601                            DAG.getConstant(Mask, DL, VT));
4602       }
4603     }
4604   }
4605   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4606   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
4607     unsigned BitSize = VT.getScalarSizeInBits();
4608     SDLoc DL(N);
4609     SDValue HiBitsMask =
4610       DAG.getConstant(APInt::getHighBitsSet(BitSize,
4611                                             BitSize - N1C->getZExtValue()),
4612                       DL, VT);
4613     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4614                        HiBitsMask);
4615   }
4616 
4617   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4618   // Variant of version done on multiply, except mul by a power of 2 is turned
4619   // into a shift.
4620   APInt Val;
4621   if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4622       (isa<ConstantSDNode>(N0.getOperand(1)) ||
4623        ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
4624     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4625     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4626     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4627   }
4628 
4629   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
4630   if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
4631     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4632       if (SDValue Folded =
4633               DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
4634         return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
4635     }
4636   }
4637 
4638   if (N1C && !N1C->isOpaque())
4639     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
4640       return NewSHL;
4641 
4642   return SDValue();
4643 }
4644 
4645 SDValue DAGCombiner::visitSRA(SDNode *N) {
4646   SDValue N0 = N->getOperand(0);
4647   SDValue N1 = N->getOperand(1);
4648   EVT VT = N0.getValueType();
4649   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4650 
4651   // fold vector ops
4652   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4653   if (VT.isVector()) {
4654     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4655       return FoldedVOp;
4656 
4657     N1C = isConstOrConstSplat(N1);
4658   }
4659 
4660   // fold (sra c1, c2) -> (sra c1, c2)
4661   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4662   if (N0C && N1C && !N1C->isOpaque())
4663     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4664   // fold (sra 0, x) -> 0
4665   if (isNullConstant(N0))
4666     return N0;
4667   // fold (sra -1, x) -> -1
4668   if (isAllOnesConstant(N0))
4669     return N0;
4670   // fold (sra x, c >= size(x)) -> undef
4671   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4672     return DAG.getUNDEF(VT);
4673   // fold (sra x, 0) -> x
4674   if (N1C && N1C->isNullValue())
4675     return N0;
4676   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4677   // sext_inreg.
4678   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4679     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4680     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4681     if (VT.isVector())
4682       ExtVT = EVT::getVectorVT(*DAG.getContext(),
4683                                ExtVT, VT.getVectorNumElements());
4684     if ((!LegalOperations ||
4685          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4686       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4687                          N0.getOperand(0), DAG.getValueType(ExtVT));
4688   }
4689 
4690   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4691   if (N1C && N0.getOpcode() == ISD::SRA) {
4692     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4693       SDLoc DL(N);
4694       APInt c1 = N0C1->getAPIntValue();
4695       APInt c2 = N1C->getAPIntValue();
4696       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4697 
4698       APInt Sum = c1 + c2;
4699       if (Sum.uge(OpSizeInBits))
4700         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
4701 
4702       return DAG.getNode(
4703           ISD::SRA, DL, VT, N0.getOperand(0),
4704           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4705     }
4706   }
4707 
4708   // fold (sra (shl X, m), (sub result_size, n))
4709   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4710   // result_size - n != m.
4711   // If truncate is free for the target sext(shl) is likely to result in better
4712   // code.
4713   if (N0.getOpcode() == ISD::SHL && N1C) {
4714     // Get the two constanst of the shifts, CN0 = m, CN = n.
4715     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4716     if (N01C) {
4717       LLVMContext &Ctx = *DAG.getContext();
4718       // Determine what the truncate's result bitsize and type would be.
4719       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4720 
4721       if (VT.isVector())
4722         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4723 
4724       // Determine the residual right-shift amount.
4725       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4726 
4727       // If the shift is not a no-op (in which case this should be just a sign
4728       // extend already), the truncated to type is legal, sign_extend is legal
4729       // on that type, and the truncate to that type is both legal and free,
4730       // perform the transform.
4731       if ((ShiftAmt > 0) &&
4732           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4733           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4734           TLI.isTruncateFree(VT, TruncVT)) {
4735 
4736         SDLoc DL(N);
4737         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4738             getShiftAmountTy(N0.getOperand(0).getValueType()));
4739         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4740                                     N0.getOperand(0), Amt);
4741         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4742                                     Shift);
4743         return DAG.getNode(ISD::SIGN_EXTEND, DL,
4744                            N->getValueType(0), Trunc);
4745       }
4746     }
4747   }
4748 
4749   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4750   if (N1.getOpcode() == ISD::TRUNCATE &&
4751       N1.getOperand(0).getOpcode() == ISD::AND) {
4752     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4753       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4754   }
4755 
4756   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4757   //      if c1 is equal to the number of bits the trunc removes
4758   if (N0.getOpcode() == ISD::TRUNCATE &&
4759       (N0.getOperand(0).getOpcode() == ISD::SRL ||
4760        N0.getOperand(0).getOpcode() == ISD::SRA) &&
4761       N0.getOperand(0).hasOneUse() &&
4762       N0.getOperand(0).getOperand(1).hasOneUse() &&
4763       N1C) {
4764     SDValue N0Op0 = N0.getOperand(0);
4765     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4766       unsigned LargeShiftVal = LargeShift->getZExtValue();
4767       EVT LargeVT = N0Op0.getValueType();
4768 
4769       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4770         SDLoc DL(N);
4771         SDValue Amt =
4772           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4773                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
4774         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4775                                   N0Op0.getOperand(0), Amt);
4776         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4777       }
4778     }
4779   }
4780 
4781   // Simplify, based on bits shifted out of the LHS.
4782   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4783     return SDValue(N, 0);
4784 
4785 
4786   // If the sign bit is known to be zero, switch this to a SRL.
4787   if (DAG.SignBitIsZero(N0))
4788     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4789 
4790   if (N1C && !N1C->isOpaque())
4791     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
4792       return NewSRA;
4793 
4794   return SDValue();
4795 }
4796 
4797 SDValue DAGCombiner::visitSRL(SDNode *N) {
4798   SDValue N0 = N->getOperand(0);
4799   SDValue N1 = N->getOperand(1);
4800   EVT VT = N0.getValueType();
4801   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4802 
4803   // fold vector ops
4804   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4805   if (VT.isVector()) {
4806     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4807       return FoldedVOp;
4808 
4809     N1C = isConstOrConstSplat(N1);
4810   }
4811 
4812   // fold (srl c1, c2) -> c1 >>u c2
4813   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4814   if (N0C && N1C && !N1C->isOpaque())
4815     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
4816   // fold (srl 0, x) -> 0
4817   if (isNullConstant(N0))
4818     return N0;
4819   // fold (srl x, c >= size(x)) -> undef
4820   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4821     return DAG.getUNDEF(VT);
4822   // fold (srl x, 0) -> x
4823   if (N1C && N1C->isNullValue())
4824     return N0;
4825   // if (srl x, c) is known to be zero, return 0
4826   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4827                                    APInt::getAllOnesValue(OpSizeInBits)))
4828     return DAG.getConstant(0, SDLoc(N), VT);
4829 
4830   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4831   if (N1C && N0.getOpcode() == ISD::SRL) {
4832     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4833       SDLoc DL(N);
4834       APInt c1 = N0C1->getAPIntValue();
4835       APInt c2 = N1C->getAPIntValue();
4836       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4837 
4838       APInt Sum = c1 + c2;
4839       if (Sum.uge(OpSizeInBits))
4840         return DAG.getConstant(0, DL, VT);
4841 
4842       return DAG.getNode(
4843           ISD::SRL, DL, VT, N0.getOperand(0),
4844           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4845     }
4846   }
4847 
4848   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4849   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4850       N0.getOperand(0).getOpcode() == ISD::SRL &&
4851       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4852     uint64_t c1 =
4853       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4854     uint64_t c2 = N1C->getZExtValue();
4855     EVT InnerShiftVT = N0.getOperand(0).getValueType();
4856     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4857     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
4858     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4859     if (c1 + OpSizeInBits == InnerShiftSize) {
4860       SDLoc DL(N0);
4861       if (c1 + c2 >= InnerShiftSize)
4862         return DAG.getConstant(0, DL, VT);
4863       return DAG.getNode(ISD::TRUNCATE, DL, VT,
4864                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
4865                                      N0.getOperand(0)->getOperand(0),
4866                                      DAG.getConstant(c1 + c2, DL,
4867                                                      ShiftCountVT)));
4868     }
4869   }
4870 
4871   // fold (srl (shl x, c), c) -> (and x, cst2)
4872   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
4873     unsigned BitSize = N0.getScalarValueSizeInBits();
4874     if (BitSize <= 64) {
4875       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
4876       SDLoc DL(N);
4877       return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4878                          DAG.getConstant(~0ULL >> ShAmt, DL, VT));
4879     }
4880   }
4881 
4882   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4883   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4884     // Shifting in all undef bits?
4885     EVT SmallVT = N0.getOperand(0).getValueType();
4886     unsigned BitSize = SmallVT.getScalarSizeInBits();
4887     if (N1C->getZExtValue() >= BitSize)
4888       return DAG.getUNDEF(VT);
4889 
4890     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4891       uint64_t ShiftAmt = N1C->getZExtValue();
4892       SDLoc DL0(N0);
4893       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
4894                                        N0.getOperand(0),
4895                           DAG.getConstant(ShiftAmt, DL0,
4896                                           getShiftAmountTy(SmallVT)));
4897       AddToWorklist(SmallShift.getNode());
4898       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
4899       SDLoc DL(N);
4900       return DAG.getNode(ISD::AND, DL, VT,
4901                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
4902                          DAG.getConstant(Mask, DL, VT));
4903     }
4904   }
4905 
4906   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
4907   // bit, which is unmodified by sra.
4908   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
4909     if (N0.getOpcode() == ISD::SRA)
4910       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4911   }
4912 
4913   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
4914   if (N1C && N0.getOpcode() == ISD::CTLZ &&
4915       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
4916     APInt KnownZero, KnownOne;
4917     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
4918 
4919     // If any of the input bits are KnownOne, then the input couldn't be all
4920     // zeros, thus the result of the srl will always be zero.
4921     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
4922 
4923     // If all of the bits input the to ctlz node are known to be zero, then
4924     // the result of the ctlz is "32" and the result of the shift is one.
4925     APInt UnknownBits = ~KnownZero;
4926     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
4927 
4928     // Otherwise, check to see if there is exactly one bit input to the ctlz.
4929     if ((UnknownBits & (UnknownBits - 1)) == 0) {
4930       // Okay, we know that only that the single bit specified by UnknownBits
4931       // could be set on input to the CTLZ node. If this bit is set, the SRL
4932       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
4933       // to an SRL/XOR pair, which is likely to simplify more.
4934       unsigned ShAmt = UnknownBits.countTrailingZeros();
4935       SDValue Op = N0.getOperand(0);
4936 
4937       if (ShAmt) {
4938         SDLoc DL(N0);
4939         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4940                   DAG.getConstant(ShAmt, DL,
4941                                   getShiftAmountTy(Op.getValueType())));
4942         AddToWorklist(Op.getNode());
4943       }
4944 
4945       SDLoc DL(N);
4946       return DAG.getNode(ISD::XOR, DL, VT,
4947                          Op, DAG.getConstant(1, DL, VT));
4948     }
4949   }
4950 
4951   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
4952   if (N1.getOpcode() == ISD::TRUNCATE &&
4953       N1.getOperand(0).getOpcode() == ISD::AND) {
4954     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4955       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
4956   }
4957 
4958   // fold operands of srl based on knowledge that the low bits are not
4959   // demanded.
4960   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4961     return SDValue(N, 0);
4962 
4963   if (N1C && !N1C->isOpaque())
4964     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
4965       return NewSRL;
4966 
4967   // Attempt to convert a srl of a load into a narrower zero-extending load.
4968   if (SDValue NarrowLoad = ReduceLoadWidth(N))
4969     return NarrowLoad;
4970 
4971   // Here is a common situation. We want to optimize:
4972   //
4973   //   %a = ...
4974   //   %b = and i32 %a, 2
4975   //   %c = srl i32 %b, 1
4976   //   brcond i32 %c ...
4977   //
4978   // into
4979   //
4980   //   %a = ...
4981   //   %b = and %a, 2
4982   //   %c = setcc eq %b, 0
4983   //   brcond %c ...
4984   //
4985   // However when after the source operand of SRL is optimized into AND, the SRL
4986   // itself may not be optimized further. Look for it and add the BRCOND into
4987   // the worklist.
4988   if (N->hasOneUse()) {
4989     SDNode *Use = *N->use_begin();
4990     if (Use->getOpcode() == ISD::BRCOND)
4991       AddToWorklist(Use);
4992     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
4993       // Also look pass the truncate.
4994       Use = *Use->use_begin();
4995       if (Use->getOpcode() == ISD::BRCOND)
4996         AddToWorklist(Use);
4997     }
4998   }
4999 
5000   return SDValue();
5001 }
5002 
5003 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5004   SDValue N0 = N->getOperand(0);
5005   EVT VT = N->getValueType(0);
5006 
5007   // fold (bswap c1) -> c2
5008   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5009     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5010   // fold (bswap (bswap x)) -> x
5011   if (N0.getOpcode() == ISD::BSWAP)
5012     return N0->getOperand(0);
5013   return SDValue();
5014 }
5015 
5016 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5017   SDValue N0 = N->getOperand(0);
5018 
5019   // fold (bitreverse (bitreverse x)) -> x
5020   if (N0.getOpcode() == ISD::BITREVERSE)
5021     return N0.getOperand(0);
5022   return SDValue();
5023 }
5024 
5025 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5026   SDValue N0 = N->getOperand(0);
5027   EVT VT = N->getValueType(0);
5028 
5029   // fold (ctlz c1) -> c2
5030   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5031     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5032   return SDValue();
5033 }
5034 
5035 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5036   SDValue N0 = N->getOperand(0);
5037   EVT VT = N->getValueType(0);
5038 
5039   // fold (ctlz_zero_undef c1) -> c2
5040   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5041     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5042   return SDValue();
5043 }
5044 
5045 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5046   SDValue N0 = N->getOperand(0);
5047   EVT VT = N->getValueType(0);
5048 
5049   // fold (cttz c1) -> c2
5050   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5051     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5052   return SDValue();
5053 }
5054 
5055 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5056   SDValue N0 = N->getOperand(0);
5057   EVT VT = N->getValueType(0);
5058 
5059   // fold (cttz_zero_undef c1) -> c2
5060   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5061     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5062   return SDValue();
5063 }
5064 
5065 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5066   SDValue N0 = N->getOperand(0);
5067   EVT VT = N->getValueType(0);
5068 
5069   // fold (ctpop c1) -> c2
5070   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5071     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5072   return SDValue();
5073 }
5074 
5075 
5076 /// \brief Generate Min/Max node
5077 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5078                                    SDValue RHS, SDValue True, SDValue False,
5079                                    ISD::CondCode CC, const TargetLowering &TLI,
5080                                    SelectionDAG &DAG) {
5081   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5082     return SDValue();
5083 
5084   switch (CC) {
5085   case ISD::SETOLT:
5086   case ISD::SETOLE:
5087   case ISD::SETLT:
5088   case ISD::SETLE:
5089   case ISD::SETULT:
5090   case ISD::SETULE: {
5091     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5092     if (TLI.isOperationLegal(Opcode, VT))
5093       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5094     return SDValue();
5095   }
5096   case ISD::SETOGT:
5097   case ISD::SETOGE:
5098   case ISD::SETGT:
5099   case ISD::SETGE:
5100   case ISD::SETUGT:
5101   case ISD::SETUGE: {
5102     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5103     if (TLI.isOperationLegal(Opcode, VT))
5104       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5105     return SDValue();
5106   }
5107   default:
5108     return SDValue();
5109   }
5110 }
5111 
5112 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5113   SDValue N0 = N->getOperand(0);
5114   SDValue N1 = N->getOperand(1);
5115   SDValue N2 = N->getOperand(2);
5116   EVT VT = N->getValueType(0);
5117   EVT VT0 = N0.getValueType();
5118 
5119   // fold (select C, X, X) -> X
5120   if (N1 == N2)
5121     return N1;
5122   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5123     // fold (select true, X, Y) -> X
5124     // fold (select false, X, Y) -> Y
5125     return !N0C->isNullValue() ? N1 : N2;
5126   }
5127   // fold (select C, 1, X) -> (or C, X)
5128   if (VT == MVT::i1 && isOneConstant(N1))
5129     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5130   // fold (select C, 0, 1) -> (xor C, 1)
5131   // We can't do this reliably if integer based booleans have different contents
5132   // to floating point based booleans. This is because we can't tell whether we
5133   // have an integer-based boolean or a floating-point-based boolean unless we
5134   // can find the SETCC that produced it and inspect its operands. This is
5135   // fairly easy if C is the SETCC node, but it can potentially be
5136   // undiscoverable (or not reasonably discoverable). For example, it could be
5137   // in another basic block or it could require searching a complicated
5138   // expression.
5139   if (VT.isInteger() &&
5140       (VT0 == MVT::i1 || (VT0.isInteger() &&
5141                           TLI.getBooleanContents(false, false) ==
5142                               TLI.getBooleanContents(false, true) &&
5143                           TLI.getBooleanContents(false, false) ==
5144                               TargetLowering::ZeroOrOneBooleanContent)) &&
5145       isNullConstant(N1) && isOneConstant(N2)) {
5146     SDValue XORNode;
5147     if (VT == VT0) {
5148       SDLoc DL(N);
5149       return DAG.getNode(ISD::XOR, DL, VT0,
5150                          N0, DAG.getConstant(1, DL, VT0));
5151     }
5152     SDLoc DL0(N0);
5153     XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
5154                           N0, DAG.getConstant(1, DL0, VT0));
5155     AddToWorklist(XORNode.getNode());
5156     if (VT.bitsGT(VT0))
5157       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
5158     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
5159   }
5160   // fold (select C, 0, X) -> (and (not C), X)
5161   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5162     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5163     AddToWorklist(NOTNode.getNode());
5164     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5165   }
5166   // fold (select C, X, 1) -> (or (not C), X)
5167   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
5168     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5169     AddToWorklist(NOTNode.getNode());
5170     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
5171   }
5172   // fold (select C, X, 0) -> (and C, X)
5173   if (VT == MVT::i1 && isNullConstant(N2))
5174     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5175   // fold (select X, X, Y) -> (or X, Y)
5176   // fold (select X, 1, Y) -> (or X, Y)
5177   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5178     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5179   // fold (select X, Y, X) -> (and X, Y)
5180   // fold (select X, Y, 0) -> (and X, Y)
5181   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
5182     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5183 
5184   // If we can fold this based on the true/false value, do so.
5185   if (SimplifySelectOps(N, N1, N2))
5186     return SDValue(N, 0);  // Don't revisit N.
5187 
5188   if (VT0 == MVT::i1) {
5189     // The code in this block deals with the following 2 equivalences:
5190     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
5191     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
5192     // The target can specify its prefered form with the
5193     // shouldNormalizeToSelectSequence() callback. However we always transform
5194     // to the right anyway if we find the inner select exists in the DAG anyway
5195     // and we always transform to the left side if we know that we can further
5196     // optimize the combination of the conditions.
5197     bool normalizeToSequence
5198       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
5199     // select (and Cond0, Cond1), X, Y
5200     //   -> select Cond0, (select Cond1, X, Y), Y
5201     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5202       SDValue Cond0 = N0->getOperand(0);
5203       SDValue Cond1 = N0->getOperand(1);
5204       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5205                                         N1.getValueType(), Cond1, N1, N2);
5206       if (normalizeToSequence || !InnerSelect.use_empty())
5207         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5208                            InnerSelect, N2);
5209     }
5210     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5211     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5212       SDValue Cond0 = N0->getOperand(0);
5213       SDValue Cond1 = N0->getOperand(1);
5214       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5215                                         N1.getValueType(), Cond1, N1, N2);
5216       if (normalizeToSequence || !InnerSelect.use_empty())
5217         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5218                            InnerSelect);
5219     }
5220 
5221     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5222     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
5223       SDValue N1_0 = N1->getOperand(0);
5224       SDValue N1_1 = N1->getOperand(1);
5225       SDValue N1_2 = N1->getOperand(2);
5226       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5227         // Create the actual and node if we can generate good code for it.
5228         if (!normalizeToSequence) {
5229           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5230                                     N0, N1_0);
5231           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5232                              N1_1, N2);
5233         }
5234         // Otherwise see if we can optimize the "and" to a better pattern.
5235         if (SDValue Combined = visitANDLike(N0, N1_0, N))
5236           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5237                              N1_1, N2);
5238       }
5239     }
5240     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5241     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
5242       SDValue N2_0 = N2->getOperand(0);
5243       SDValue N2_1 = N2->getOperand(1);
5244       SDValue N2_2 = N2->getOperand(2);
5245       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5246         // Create the actual or node if we can generate good code for it.
5247         if (!normalizeToSequence) {
5248           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5249                                    N0, N2_0);
5250           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5251                              N1, N2_2);
5252         }
5253         // Otherwise see if we can optimize to a better pattern.
5254         if (SDValue Combined = visitORLike(N0, N2_0, N))
5255           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5256                              N1, N2_2);
5257       }
5258     }
5259   }
5260 
5261   // fold selects based on a setcc into other things, such as min/max/abs
5262   if (N0.getOpcode() == ISD::SETCC) {
5263     // select x, y (fcmp lt x, y) -> fminnum x, y
5264     // select x, y (fcmp gt x, y) -> fmaxnum x, y
5265     //
5266     // This is OK if we don't care about what happens if either operand is a
5267     // NaN.
5268     //
5269 
5270     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
5271     // no signed zeros as well as no nans.
5272     const TargetOptions &Options = DAG.getTarget().Options;
5273     if (Options.UnsafeFPMath &&
5274         VT.isFloatingPoint() && N0.hasOneUse() &&
5275         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
5276       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5277 
5278       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
5279                                                 N0.getOperand(1), N1, N2, CC,
5280                                                 TLI, DAG))
5281         return FMinMax;
5282     }
5283 
5284     if ((!LegalOperations &&
5285          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5286         TLI.isOperationLegal(ISD::SELECT_CC, VT))
5287       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5288                          N0.getOperand(0), N0.getOperand(1),
5289                          N1, N2, N0.getOperand(2));
5290     return SimplifySelect(SDLoc(N), N0, N1, N2);
5291   }
5292 
5293   return SDValue();
5294 }
5295 
5296 static
5297 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5298   SDLoc DL(N);
5299   EVT LoVT, HiVT;
5300   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5301 
5302   // Split the inputs.
5303   SDValue Lo, Hi, LL, LH, RL, RH;
5304   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5305   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5306 
5307   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5308   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5309 
5310   return std::make_pair(Lo, Hi);
5311 }
5312 
5313 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5314 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5315 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
5316   SDLoc dl(N);
5317   SDValue Cond = N->getOperand(0);
5318   SDValue LHS = N->getOperand(1);
5319   SDValue RHS = N->getOperand(2);
5320   EVT VT = N->getValueType(0);
5321   int NumElems = VT.getVectorNumElements();
5322   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
5323          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5324          Cond.getOpcode() == ISD::BUILD_VECTOR);
5325 
5326   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5327   // binary ones here.
5328   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5329     return SDValue();
5330 
5331   // We're sure we have an even number of elements due to the
5332   // concat_vectors we have as arguments to vselect.
5333   // Skip BV elements until we find one that's not an UNDEF
5334   // After we find an UNDEF element, keep looping until we get to half the
5335   // length of the BV and see if all the non-undef nodes are the same.
5336   ConstantSDNode *BottomHalf = nullptr;
5337   for (int i = 0; i < NumElems / 2; ++i) {
5338     if (Cond->getOperand(i)->isUndef())
5339       continue;
5340 
5341     if (BottomHalf == nullptr)
5342       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5343     else if (Cond->getOperand(i).getNode() != BottomHalf)
5344       return SDValue();
5345   }
5346 
5347   // Do the same for the second half of the BuildVector
5348   ConstantSDNode *TopHalf = nullptr;
5349   for (int i = NumElems / 2; i < NumElems; ++i) {
5350     if (Cond->getOperand(i)->isUndef())
5351       continue;
5352 
5353     if (TopHalf == nullptr)
5354       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5355     else if (Cond->getOperand(i).getNode() != TopHalf)
5356       return SDValue();
5357   }
5358 
5359   assert(TopHalf && BottomHalf &&
5360          "One half of the selector was all UNDEFs and the other was all the "
5361          "same value. This should have been addressed before this function.");
5362   return DAG.getNode(
5363       ISD::CONCAT_VECTORS, dl, VT,
5364       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5365       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5366 }
5367 
5368 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5369 
5370   if (Level >= AfterLegalizeTypes)
5371     return SDValue();
5372 
5373   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5374   SDValue Mask = MSC->getMask();
5375   SDValue Data  = MSC->getValue();
5376   SDLoc DL(N);
5377 
5378   // If the MSCATTER data type requires splitting and the mask is provided by a
5379   // SETCC, then split both nodes and its operands before legalization. This
5380   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5381   // and enables future optimizations (e.g. min/max pattern matching on X86).
5382   if (Mask.getOpcode() != ISD::SETCC)
5383     return SDValue();
5384 
5385   // Check if any splitting is required.
5386   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5387       TargetLowering::TypeSplitVector)
5388     return SDValue();
5389   SDValue MaskLo, MaskHi, Lo, Hi;
5390   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5391 
5392   EVT LoVT, HiVT;
5393   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5394 
5395   SDValue Chain = MSC->getChain();
5396 
5397   EVT MemoryVT = MSC->getMemoryVT();
5398   unsigned Alignment = MSC->getOriginalAlignment();
5399 
5400   EVT LoMemVT, HiMemVT;
5401   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5402 
5403   SDValue DataLo, DataHi;
5404   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5405 
5406   SDValue BasePtr = MSC->getBasePtr();
5407   SDValue IndexLo, IndexHi;
5408   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5409 
5410   MachineMemOperand *MMO = DAG.getMachineFunction().
5411     getMachineMemOperand(MSC->getPointerInfo(),
5412                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5413                           Alignment, MSC->getAAInfo(), MSC->getRanges());
5414 
5415   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5416   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5417                             DL, OpsLo, MMO);
5418 
5419   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5420   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5421                             DL, OpsHi, MMO);
5422 
5423   AddToWorklist(Lo.getNode());
5424   AddToWorklist(Hi.getNode());
5425 
5426   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5427 }
5428 
5429 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5430 
5431   if (Level >= AfterLegalizeTypes)
5432     return SDValue();
5433 
5434   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
5435   SDValue Mask = MST->getMask();
5436   SDValue Data  = MST->getValue();
5437   SDLoc DL(N);
5438 
5439   // If the MSTORE data type requires splitting and the mask is provided by a
5440   // SETCC, then split both nodes and its operands before legalization. This
5441   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5442   // and enables future optimizations (e.g. min/max pattern matching on X86).
5443   if (Mask.getOpcode() == ISD::SETCC) {
5444 
5445     // Check if any splitting is required.
5446     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5447         TargetLowering::TypeSplitVector)
5448       return SDValue();
5449 
5450     SDValue MaskLo, MaskHi, Lo, Hi;
5451     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5452 
5453     EVT LoVT, HiVT;
5454     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
5455 
5456     SDValue Chain = MST->getChain();
5457     SDValue Ptr   = MST->getBasePtr();
5458 
5459     EVT MemoryVT = MST->getMemoryVT();
5460     unsigned Alignment = MST->getOriginalAlignment();
5461 
5462     // if Alignment is equal to the vector size,
5463     // take the half of it for the second part
5464     unsigned SecondHalfAlignment =
5465       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
5466          Alignment/2 : Alignment;
5467 
5468     EVT LoMemVT, HiMemVT;
5469     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5470 
5471     SDValue DataLo, DataHi;
5472     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5473 
5474     MachineMemOperand *MMO = DAG.getMachineFunction().
5475       getMachineMemOperand(MST->getPointerInfo(),
5476                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
5477                            Alignment, MST->getAAInfo(), MST->getRanges());
5478 
5479     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5480                             MST->isTruncatingStore());
5481 
5482     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5483     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5484                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5485 
5486     MMO = DAG.getMachineFunction().
5487       getMachineMemOperand(MST->getPointerInfo(),
5488                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
5489                            SecondHalfAlignment, MST->getAAInfo(),
5490                            MST->getRanges());
5491 
5492     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5493                             MST->isTruncatingStore());
5494 
5495     AddToWorklist(Lo.getNode());
5496     AddToWorklist(Hi.getNode());
5497 
5498     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5499   }
5500   return SDValue();
5501 }
5502 
5503 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5504 
5505   if (Level >= AfterLegalizeTypes)
5506     return SDValue();
5507 
5508   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
5509   SDValue Mask = MGT->getMask();
5510   SDLoc DL(N);
5511 
5512   // If the MGATHER result requires splitting and the mask is provided by a
5513   // SETCC, then split both nodes and its operands before legalization. This
5514   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5515   // and enables future optimizations (e.g. min/max pattern matching on X86).
5516 
5517   if (Mask.getOpcode() != ISD::SETCC)
5518     return SDValue();
5519 
5520   EVT VT = N->getValueType(0);
5521 
5522   // Check if any splitting is required.
5523   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5524       TargetLowering::TypeSplitVector)
5525     return SDValue();
5526 
5527   SDValue MaskLo, MaskHi, Lo, Hi;
5528   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5529 
5530   SDValue Src0 = MGT->getValue();
5531   SDValue Src0Lo, Src0Hi;
5532   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5533 
5534   EVT LoVT, HiVT;
5535   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5536 
5537   SDValue Chain = MGT->getChain();
5538   EVT MemoryVT = MGT->getMemoryVT();
5539   unsigned Alignment = MGT->getOriginalAlignment();
5540 
5541   EVT LoMemVT, HiMemVT;
5542   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5543 
5544   SDValue BasePtr = MGT->getBasePtr();
5545   SDValue Index = MGT->getIndex();
5546   SDValue IndexLo, IndexHi;
5547   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5548 
5549   MachineMemOperand *MMO = DAG.getMachineFunction().
5550     getMachineMemOperand(MGT->getPointerInfo(),
5551                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5552                           Alignment, MGT->getAAInfo(), MGT->getRanges());
5553 
5554   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5555   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5556                             MMO);
5557 
5558   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5559   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5560                             MMO);
5561 
5562   AddToWorklist(Lo.getNode());
5563   AddToWorklist(Hi.getNode());
5564 
5565   // Build a factor node to remember that this load is independent of the
5566   // other one.
5567   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5568                       Hi.getValue(1));
5569 
5570   // Legalized the chain result - switch anything that used the old chain to
5571   // use the new one.
5572   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5573 
5574   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5575 
5576   SDValue RetOps[] = { GatherRes, Chain };
5577   return DAG.getMergeValues(RetOps, DL);
5578 }
5579 
5580 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5581 
5582   if (Level >= AfterLegalizeTypes)
5583     return SDValue();
5584 
5585   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
5586   SDValue Mask = MLD->getMask();
5587   SDLoc DL(N);
5588 
5589   // If the MLOAD result requires splitting and the mask is provided by a
5590   // SETCC, then split both nodes and its operands before legalization. This
5591   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5592   // and enables future optimizations (e.g. min/max pattern matching on X86).
5593 
5594   if (Mask.getOpcode() == ISD::SETCC) {
5595     EVT VT = N->getValueType(0);
5596 
5597     // Check if any splitting is required.
5598     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5599         TargetLowering::TypeSplitVector)
5600       return SDValue();
5601 
5602     SDValue MaskLo, MaskHi, Lo, Hi;
5603     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5604 
5605     SDValue Src0 = MLD->getSrc0();
5606     SDValue Src0Lo, Src0Hi;
5607     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5608 
5609     EVT LoVT, HiVT;
5610     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5611 
5612     SDValue Chain = MLD->getChain();
5613     SDValue Ptr   = MLD->getBasePtr();
5614     EVT MemoryVT = MLD->getMemoryVT();
5615     unsigned Alignment = MLD->getOriginalAlignment();
5616 
5617     // if Alignment is equal to the vector size,
5618     // take the half of it for the second part
5619     unsigned SecondHalfAlignment =
5620       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5621          Alignment/2 : Alignment;
5622 
5623     EVT LoMemVT, HiMemVT;
5624     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5625 
5626     MachineMemOperand *MMO = DAG.getMachineFunction().
5627     getMachineMemOperand(MLD->getPointerInfo(),
5628                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
5629                          Alignment, MLD->getAAInfo(), MLD->getRanges());
5630 
5631     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5632                            ISD::NON_EXTLOAD);
5633 
5634     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5635     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5636                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5637 
5638     MMO = DAG.getMachineFunction().
5639     getMachineMemOperand(MLD->getPointerInfo(),
5640                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
5641                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5642 
5643     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5644                            ISD::NON_EXTLOAD);
5645 
5646     AddToWorklist(Lo.getNode());
5647     AddToWorklist(Hi.getNode());
5648 
5649     // Build a factor node to remember that this load is independent of the
5650     // other one.
5651     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5652                         Hi.getValue(1));
5653 
5654     // Legalized the chain result - switch anything that used the old chain to
5655     // use the new one.
5656     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5657 
5658     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5659 
5660     SDValue RetOps[] = { LoadRes, Chain };
5661     return DAG.getMergeValues(RetOps, DL);
5662   }
5663   return SDValue();
5664 }
5665 
5666 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5667   SDValue N0 = N->getOperand(0);
5668   SDValue N1 = N->getOperand(1);
5669   SDValue N2 = N->getOperand(2);
5670   SDLoc DL(N);
5671 
5672   // Canonicalize integer abs.
5673   // vselect (setg[te] X,  0),  X, -X ->
5674   // vselect (setgt    X, -1),  X, -X ->
5675   // vselect (setl[te] X,  0), -X,  X ->
5676   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5677   if (N0.getOpcode() == ISD::SETCC) {
5678     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5679     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5680     bool isAbs = false;
5681     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5682 
5683     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5684          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5685         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5686       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
5687     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5688              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5689       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5690 
5691     if (isAbs) {
5692       EVT VT = LHS.getValueType();
5693       SDValue Shift = DAG.getNode(
5694           ISD::SRA, DL, VT, LHS,
5695           DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
5696       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5697       AddToWorklist(Shift.getNode());
5698       AddToWorklist(Add.getNode());
5699       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5700     }
5701   }
5702 
5703   if (SimplifySelectOps(N, N1, N2))
5704     return SDValue(N, 0);  // Don't revisit N.
5705 
5706   // If the VSELECT result requires splitting and the mask is provided by a
5707   // SETCC, then split both nodes and its operands before legalization. This
5708   // prevents the type legalizer from unrolling SETCC into scalar comparisons
5709   // and enables future optimizations (e.g. min/max pattern matching on X86).
5710   if (N0.getOpcode() == ISD::SETCC) {
5711     EVT VT = N->getValueType(0);
5712 
5713     // Check if any splitting is required.
5714     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5715         TargetLowering::TypeSplitVector)
5716       return SDValue();
5717 
5718     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5719     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5720     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5721     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5722 
5723     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5724     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5725 
5726     // Add the new VSELECT nodes to the work list in case they need to be split
5727     // again.
5728     AddToWorklist(Lo.getNode());
5729     AddToWorklist(Hi.getNode());
5730 
5731     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5732   }
5733 
5734   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5735   if (ISD::isBuildVectorAllOnes(N0.getNode()))
5736     return N1;
5737   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5738   if (ISD::isBuildVectorAllZeros(N0.getNode()))
5739     return N2;
5740 
5741   // The ConvertSelectToConcatVector function is assuming both the above
5742   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5743   // and addressed.
5744   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5745       N2.getOpcode() == ISD::CONCAT_VECTORS &&
5746       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
5747     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
5748       return CV;
5749   }
5750 
5751   return SDValue();
5752 }
5753 
5754 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5755   SDValue N0 = N->getOperand(0);
5756   SDValue N1 = N->getOperand(1);
5757   SDValue N2 = N->getOperand(2);
5758   SDValue N3 = N->getOperand(3);
5759   SDValue N4 = N->getOperand(4);
5760   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5761 
5762   // fold select_cc lhs, rhs, x, x, cc -> x
5763   if (N2 == N3)
5764     return N2;
5765 
5766   // Determine if the condition we're dealing with is constant
5767   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
5768                                   CC, SDLoc(N), false)) {
5769     AddToWorklist(SCC.getNode());
5770 
5771     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5772       if (!SCCC->isNullValue())
5773         return N2;    // cond always true -> true val
5774       else
5775         return N3;    // cond always false -> false val
5776     } else if (SCC->isUndef()) {
5777       // When the condition is UNDEF, just return the first operand. This is
5778       // coherent the DAG creation, no setcc node is created in this case
5779       return N2;
5780     } else if (SCC.getOpcode() == ISD::SETCC) {
5781       // Fold to a simpler select_cc
5782       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
5783                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
5784                          SCC.getOperand(2));
5785     }
5786   }
5787 
5788   // If we can fold this based on the true/false value, do so.
5789   if (SimplifySelectOps(N, N2, N3))
5790     return SDValue(N, 0);  // Don't revisit N.
5791 
5792   // fold select_cc into other things, such as min/max/abs
5793   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
5794 }
5795 
5796 SDValue DAGCombiner::visitSETCC(SDNode *N) {
5797   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
5798                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
5799                        SDLoc(N));
5800 }
5801 
5802 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
5803   SDValue LHS = N->getOperand(0);
5804   SDValue RHS = N->getOperand(1);
5805   SDValue Carry = N->getOperand(2);
5806   SDValue Cond = N->getOperand(3);
5807 
5808   // If Carry is false, fold to a regular SETCC.
5809   if (Carry.getOpcode() == ISD::CARRY_FALSE)
5810     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
5811 
5812   return SDValue();
5813 }
5814 
5815 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
5816 /// a build_vector of constants.
5817 /// This function is called by the DAGCombiner when visiting sext/zext/aext
5818 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
5819 /// Vector extends are not folded if operations are legal; this is to
5820 /// avoid introducing illegal build_vector dag nodes.
5821 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
5822                                          SelectionDAG &DAG, bool LegalTypes,
5823                                          bool LegalOperations) {
5824   unsigned Opcode = N->getOpcode();
5825   SDValue N0 = N->getOperand(0);
5826   EVT VT = N->getValueType(0);
5827 
5828   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
5829          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5830          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
5831          && "Expected EXTEND dag node in input!");
5832 
5833   // fold (sext c1) -> c1
5834   // fold (zext c1) -> c1
5835   // fold (aext c1) -> c1
5836   if (isa<ConstantSDNode>(N0))
5837     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
5838 
5839   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
5840   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
5841   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
5842   EVT SVT = VT.getScalarType();
5843   if (!(VT.isVector() &&
5844       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
5845       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
5846     return nullptr;
5847 
5848   // We can fold this node into a build_vector.
5849   unsigned VTBits = SVT.getSizeInBits();
5850   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
5851   SmallVector<SDValue, 8> Elts;
5852   unsigned NumElts = VT.getVectorNumElements();
5853   SDLoc DL(N);
5854 
5855   for (unsigned i=0; i != NumElts; ++i) {
5856     SDValue Op = N0->getOperand(i);
5857     if (Op->isUndef()) {
5858       Elts.push_back(DAG.getUNDEF(SVT));
5859       continue;
5860     }
5861 
5862     SDLoc DL(Op);
5863     // Get the constant value and if needed trunc it to the size of the type.
5864     // Nodes like build_vector might have constants wider than the scalar type.
5865     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
5866     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5867       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
5868     else
5869       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
5870   }
5871 
5872   return DAG.getBuildVector(VT, DL, Elts).getNode();
5873 }
5874 
5875 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
5876 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
5877 // transformation. Returns true if extension are possible and the above
5878 // mentioned transformation is profitable.
5879 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
5880                                     unsigned ExtOpc,
5881                                     SmallVectorImpl<SDNode *> &ExtendNodes,
5882                                     const TargetLowering &TLI) {
5883   bool HasCopyToRegUses = false;
5884   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
5885   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
5886                             UE = N0.getNode()->use_end();
5887        UI != UE; ++UI) {
5888     SDNode *User = *UI;
5889     if (User == N)
5890       continue;
5891     if (UI.getUse().getResNo() != N0.getResNo())
5892       continue;
5893     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
5894     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
5895       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
5896       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
5897         // Sign bits will be lost after a zext.
5898         return false;
5899       bool Add = false;
5900       for (unsigned i = 0; i != 2; ++i) {
5901         SDValue UseOp = User->getOperand(i);
5902         if (UseOp == N0)
5903           continue;
5904         if (!isa<ConstantSDNode>(UseOp))
5905           return false;
5906         Add = true;
5907       }
5908       if (Add)
5909         ExtendNodes.push_back(User);
5910       continue;
5911     }
5912     // If truncates aren't free and there are users we can't
5913     // extend, it isn't worthwhile.
5914     if (!isTruncFree)
5915       return false;
5916     // Remember if this value is live-out.
5917     if (User->getOpcode() == ISD::CopyToReg)
5918       HasCopyToRegUses = true;
5919   }
5920 
5921   if (HasCopyToRegUses) {
5922     bool BothLiveOut = false;
5923     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5924          UI != UE; ++UI) {
5925       SDUse &Use = UI.getUse();
5926       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
5927         BothLiveOut = true;
5928         break;
5929       }
5930     }
5931     if (BothLiveOut)
5932       // Both unextended and extended values are live out. There had better be
5933       // a good reason for the transformation.
5934       return ExtendNodes.size();
5935   }
5936   return true;
5937 }
5938 
5939 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
5940                                   SDValue Trunc, SDValue ExtLoad,
5941                                   const SDLoc &DL, ISD::NodeType ExtType) {
5942   // Extend SetCC uses if necessary.
5943   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
5944     SDNode *SetCC = SetCCs[i];
5945     SmallVector<SDValue, 4> Ops;
5946 
5947     for (unsigned j = 0; j != 2; ++j) {
5948       SDValue SOp = SetCC->getOperand(j);
5949       if (SOp == Trunc)
5950         Ops.push_back(ExtLoad);
5951       else
5952         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
5953     }
5954 
5955     Ops.push_back(SetCC->getOperand(2));
5956     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
5957   }
5958 }
5959 
5960 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
5961 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
5962   SDValue N0 = N->getOperand(0);
5963   EVT DstVT = N->getValueType(0);
5964   EVT SrcVT = N0.getValueType();
5965 
5966   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
5967           N->getOpcode() == ISD::ZERO_EXTEND) &&
5968          "Unexpected node type (not an extend)!");
5969 
5970   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
5971   // For example, on a target with legal v4i32, but illegal v8i32, turn:
5972   //   (v8i32 (sext (v8i16 (load x))))
5973   // into:
5974   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
5975   //                          (v4i32 (sextload (x + 16)))))
5976   // Where uses of the original load, i.e.:
5977   //   (v8i16 (load x))
5978   // are replaced with:
5979   //   (v8i16 (truncate
5980   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
5981   //                            (v4i32 (sextload (x + 16)))))))
5982   //
5983   // This combine is only applicable to illegal, but splittable, vectors.
5984   // All legal types, and illegal non-vector types, are handled elsewhere.
5985   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
5986   //
5987   if (N0->getOpcode() != ISD::LOAD)
5988     return SDValue();
5989 
5990   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5991 
5992   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
5993       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
5994       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
5995     return SDValue();
5996 
5997   SmallVector<SDNode *, 4> SetCCs;
5998   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
5999     return SDValue();
6000 
6001   ISD::LoadExtType ExtType =
6002       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6003 
6004   // Try to split the vector types to get down to legal types.
6005   EVT SplitSrcVT = SrcVT;
6006   EVT SplitDstVT = DstVT;
6007   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6008          SplitSrcVT.getVectorNumElements() > 1) {
6009     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6010     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6011   }
6012 
6013   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6014     return SDValue();
6015 
6016   SDLoc DL(N);
6017   const unsigned NumSplits =
6018       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6019   const unsigned Stride = SplitSrcVT.getStoreSize();
6020   SmallVector<SDValue, 4> Loads;
6021   SmallVector<SDValue, 4> Chains;
6022 
6023   SDValue BasePtr = LN0->getBasePtr();
6024   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6025     const unsigned Offset = Idx * Stride;
6026     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6027 
6028     SDValue SplitLoad = DAG.getExtLoad(
6029         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6030         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6031         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6032 
6033     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6034                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6035 
6036     Loads.push_back(SplitLoad.getValue(0));
6037     Chains.push_back(SplitLoad.getValue(1));
6038   }
6039 
6040   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6041   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6042 
6043   CombineTo(N, NewValue);
6044 
6045   // Replace uses of the original load (before extension)
6046   // with a truncate of the concatenated sextloaded vectors.
6047   SDValue Trunc =
6048       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6049   CombineTo(N0.getNode(), Trunc, NewChain);
6050   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6051                   (ISD::NodeType)N->getOpcode());
6052   return SDValue(N, 0); // Return N so it doesn't get rechecked!
6053 }
6054 
6055 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6056   SDValue N0 = N->getOperand(0);
6057   EVT VT = N->getValueType(0);
6058 
6059   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6060                                               LegalOperations))
6061     return SDValue(Res, 0);
6062 
6063   // fold (sext (sext x)) -> (sext x)
6064   // fold (sext (aext x)) -> (sext x)
6065   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6066     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
6067                        N0.getOperand(0));
6068 
6069   if (N0.getOpcode() == ISD::TRUNCATE) {
6070     // fold (sext (truncate (load x))) -> (sext (smaller load x))
6071     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6072     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6073       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6074       if (NarrowLoad.getNode() != N0.getNode()) {
6075         CombineTo(N0.getNode(), NarrowLoad);
6076         // CombineTo deleted the truncate, if needed, but not what's under it.
6077         AddToWorklist(oye);
6078       }
6079       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6080     }
6081 
6082     // See if the value being truncated is already sign extended.  If so, just
6083     // eliminate the trunc/sext pair.
6084     SDValue Op = N0.getOperand(0);
6085     unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
6086     unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
6087     unsigned DestBits = VT.getScalarType().getSizeInBits();
6088     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6089 
6090     if (OpBits == DestBits) {
6091       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
6092       // bits, it is already ready.
6093       if (NumSignBits > DestBits-MidBits)
6094         return Op;
6095     } else if (OpBits < DestBits) {
6096       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
6097       // bits, just sext from i32.
6098       if (NumSignBits > OpBits-MidBits)
6099         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
6100     } else {
6101       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6102       // bits, just truncate to i32.
6103       if (NumSignBits > OpBits-MidBits)
6104         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6105     }
6106 
6107     // fold (sext (truncate x)) -> (sextinreg x).
6108     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6109                                                  N0.getValueType())) {
6110       if (OpBits < DestBits)
6111         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6112       else if (OpBits > DestBits)
6113         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6114       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
6115                          DAG.getValueType(N0.getValueType()));
6116     }
6117   }
6118 
6119   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6120   // Only generate vector extloads when 1) they're legal, and 2) they are
6121   // deemed desirable by the target.
6122   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6123       ((!LegalOperations && !VT.isVector() &&
6124         !cast<LoadSDNode>(N0)->isVolatile()) ||
6125        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6126     bool DoXform = true;
6127     SmallVector<SDNode*, 4> SetCCs;
6128     if (!N0.hasOneUse())
6129       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6130     if (VT.isVector())
6131       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6132     if (DoXform) {
6133       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6134       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6135                                        LN0->getChain(),
6136                                        LN0->getBasePtr(), N0.getValueType(),
6137                                        LN0->getMemOperand());
6138       CombineTo(N, ExtLoad);
6139       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6140                                   N0.getValueType(), ExtLoad);
6141       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6142       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6143                       ISD::SIGN_EXTEND);
6144       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6145     }
6146   }
6147 
6148   // fold (sext (load x)) to multiple smaller sextloads.
6149   // Only on illegal but splittable vectors.
6150   if (SDValue ExtLoad = CombineExtLoad(N))
6151     return ExtLoad;
6152 
6153   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6154   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6155   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6156       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6157     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6158     EVT MemVT = LN0->getMemoryVT();
6159     if ((!LegalOperations && !LN0->isVolatile()) ||
6160         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6161       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6162                                        LN0->getChain(),
6163                                        LN0->getBasePtr(), MemVT,
6164                                        LN0->getMemOperand());
6165       CombineTo(N, ExtLoad);
6166       CombineTo(N0.getNode(),
6167                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6168                             N0.getValueType(), ExtLoad),
6169                 ExtLoad.getValue(1));
6170       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6171     }
6172   }
6173 
6174   // fold (sext (and/or/xor (load x), cst)) ->
6175   //      (and/or/xor (sextload x), (sext cst))
6176   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6177        N0.getOpcode() == ISD::XOR) &&
6178       isa<LoadSDNode>(N0.getOperand(0)) &&
6179       N0.getOperand(1).getOpcode() == ISD::Constant &&
6180       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6181       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6182     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6183     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6184       bool DoXform = true;
6185       SmallVector<SDNode*, 4> SetCCs;
6186       if (!N0.hasOneUse())
6187         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
6188                                           SetCCs, TLI);
6189       if (DoXform) {
6190         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
6191                                          LN0->getChain(), LN0->getBasePtr(),
6192                                          LN0->getMemoryVT(),
6193                                          LN0->getMemOperand());
6194         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6195         Mask = Mask.sext(VT.getSizeInBits());
6196         SDLoc DL(N);
6197         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6198                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6199         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6200                                     SDLoc(N0.getOperand(0)),
6201                                     N0.getOperand(0).getValueType(), ExtLoad);
6202         CombineTo(N, And);
6203         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6204         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6205                         ISD::SIGN_EXTEND);
6206         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6207       }
6208     }
6209   }
6210 
6211   if (N0.getOpcode() == ISD::SETCC) {
6212     EVT N0VT = N0.getOperand(0).getValueType();
6213     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
6214     // Only do this before legalize for now.
6215     if (VT.isVector() && !LegalOperations &&
6216         TLI.getBooleanContents(N0VT) ==
6217             TargetLowering::ZeroOrNegativeOneBooleanContent) {
6218       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
6219       // of the same size as the compared operands. Only optimize sext(setcc())
6220       // if this is the case.
6221       EVT SVT = getSetCCResultType(N0VT);
6222 
6223       // We know that the # elements of the results is the same as the
6224       // # elements of the compare (and the # elements of the compare result
6225       // for that matter).  Check to see that they are the same size.  If so,
6226       // we know that the element size of the sext'd result matches the
6227       // element size of the compare operands.
6228       if (VT.getSizeInBits() == SVT.getSizeInBits())
6229         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6230                              N0.getOperand(1),
6231                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6232 
6233       // If the desired elements are smaller or larger than the source
6234       // elements we can use a matching integer vector type and then
6235       // truncate/sign extend
6236       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6237       if (SVT == MatchingVectorType) {
6238         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6239                                N0.getOperand(0), N0.getOperand(1),
6240                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
6241         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6242       }
6243     }
6244 
6245     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
6246     // Here, T can be 1 or -1, depending on the type of the setcc and
6247     // getBooleanContents().
6248     unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
6249 
6250     SDLoc DL(N);
6251     // To determine the "true" side of the select, we need to know the high bit
6252     // of the value returned by the setcc if it evaluates to true.
6253     // If the type of the setcc is i1, then the true case of the select is just
6254     // sext(i1 1), that is, -1.
6255     // If the type of the setcc is larger (say, i8) then the value of the high
6256     // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
6257     // of the appropriate width.
6258     SDValue ExtTrueVal =
6259         (SetCCWidth == 1)
6260             ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
6261                               DL, VT)
6262             : TLI.getConstTrueVal(DAG, VT, DL);
6263 
6264     if (SDValue SCC = SimplifySelectCC(
6265             DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
6266             DAG.getConstant(0, DL, VT),
6267             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6268       return SCC;
6269 
6270     if (!VT.isVector()) {
6271       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6272       if (!LegalOperations ||
6273           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
6274         SDLoc DL(N);
6275         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6276         SDValue SetCC =
6277             DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
6278         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
6279                              DAG.getConstant(0, DL, VT));
6280       }
6281     }
6282   }
6283 
6284   // fold (sext x) -> (zext x) if the sign bit is known zero.
6285   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6286       DAG.SignBitIsZero(N0))
6287     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6288 
6289   return SDValue();
6290 }
6291 
6292 // isTruncateOf - If N is a truncate of some other value, return true, record
6293 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6294 // This function computes KnownZero to avoid a duplicated call to
6295 // computeKnownBits in the caller.
6296 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6297                          APInt &KnownZero) {
6298   APInt KnownOne;
6299   if (N->getOpcode() == ISD::TRUNCATE) {
6300     Op = N->getOperand(0);
6301     DAG.computeKnownBits(Op, KnownZero, KnownOne);
6302     return true;
6303   }
6304 
6305   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6306       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6307     return false;
6308 
6309   SDValue Op0 = N->getOperand(0);
6310   SDValue Op1 = N->getOperand(1);
6311   assert(Op0.getValueType() == Op1.getValueType());
6312 
6313   if (isNullConstant(Op0))
6314     Op = Op1;
6315   else if (isNullConstant(Op1))
6316     Op = Op0;
6317   else
6318     return false;
6319 
6320   DAG.computeKnownBits(Op, KnownZero, KnownOne);
6321 
6322   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6323     return false;
6324 
6325   return true;
6326 }
6327 
6328 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6329   SDValue N0 = N->getOperand(0);
6330   EVT VT = N->getValueType(0);
6331 
6332   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6333                                               LegalOperations))
6334     return SDValue(Res, 0);
6335 
6336   // fold (zext (zext x)) -> (zext x)
6337   // fold (zext (aext x)) -> (zext x)
6338   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6339     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6340                        N0.getOperand(0));
6341 
6342   // fold (zext (truncate x)) -> (zext x) or
6343   //      (zext (truncate x)) -> (truncate x)
6344   // This is valid when the truncated bits of x are already zero.
6345   // FIXME: We should extend this to work for vectors too.
6346   SDValue Op;
6347   APInt KnownZero;
6348   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6349     APInt TruncatedBits =
6350       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6351       APInt(Op.getValueSizeInBits(), 0) :
6352       APInt::getBitsSet(Op.getValueSizeInBits(),
6353                         N0.getValueSizeInBits(),
6354                         std::min(Op.getValueSizeInBits(),
6355                                  VT.getSizeInBits()));
6356     if (TruncatedBits == (KnownZero & TruncatedBits)) {
6357       if (VT.bitsGT(Op.getValueType()))
6358         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6359       if (VT.bitsLT(Op.getValueType()))
6360         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6361 
6362       return Op;
6363     }
6364   }
6365 
6366   // fold (zext (truncate (load x))) -> (zext (smaller load x))
6367   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6368   if (N0.getOpcode() == ISD::TRUNCATE) {
6369     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6370       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6371       if (NarrowLoad.getNode() != N0.getNode()) {
6372         CombineTo(N0.getNode(), NarrowLoad);
6373         // CombineTo deleted the truncate, if needed, but not what's under it.
6374         AddToWorklist(oye);
6375       }
6376       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6377     }
6378   }
6379 
6380   // fold (zext (truncate x)) -> (and x, mask)
6381   if (N0.getOpcode() == ISD::TRUNCATE) {
6382     // fold (zext (truncate (load x))) -> (zext (smaller load x))
6383     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6384     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6385       SDNode *oye = N0.getNode()->getOperand(0).getNode();
6386       if (NarrowLoad.getNode() != N0.getNode()) {
6387         CombineTo(N0.getNode(), NarrowLoad);
6388         // CombineTo deleted the truncate, if needed, but not what's under it.
6389         AddToWorklist(oye);
6390       }
6391       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6392     }
6393 
6394     EVT SrcVT = N0.getOperand(0).getValueType();
6395     EVT MinVT = N0.getValueType();
6396 
6397     // Try to mask before the extension to avoid having to generate a larger mask,
6398     // possibly over several sub-vectors.
6399     if (SrcVT.bitsLT(VT)) {
6400       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
6401                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
6402         SDValue Op = N0.getOperand(0);
6403         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6404         AddToWorklist(Op.getNode());
6405         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
6406       }
6407     }
6408 
6409     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
6410       SDValue Op = N0.getOperand(0);
6411       if (SrcVT.bitsLT(VT)) {
6412         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6413         AddToWorklist(Op.getNode());
6414       } else if (SrcVT.bitsGT(VT)) {
6415         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6416         AddToWorklist(Op.getNode());
6417       }
6418       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6419     }
6420   }
6421 
6422   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6423   // if either of the casts is not free.
6424   if (N0.getOpcode() == ISD::AND &&
6425       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6426       N0.getOperand(1).getOpcode() == ISD::Constant &&
6427       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6428                            N0.getValueType()) ||
6429        !TLI.isZExtFree(N0.getValueType(), VT))) {
6430     SDValue X = N0.getOperand(0).getOperand(0);
6431     if (X.getValueType().bitsLT(VT)) {
6432       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6433     } else if (X.getValueType().bitsGT(VT)) {
6434       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6435     }
6436     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6437     Mask = Mask.zext(VT.getSizeInBits());
6438     SDLoc DL(N);
6439     return DAG.getNode(ISD::AND, DL, VT,
6440                        X, DAG.getConstant(Mask, DL, VT));
6441   }
6442 
6443   // fold (zext (load x)) -> (zext (truncate (zextload x)))
6444   // Only generate vector extloads when 1) they're legal, and 2) they are
6445   // deemed desirable by the target.
6446   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6447       ((!LegalOperations && !VT.isVector() &&
6448         !cast<LoadSDNode>(N0)->isVolatile()) ||
6449        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6450     bool DoXform = true;
6451     SmallVector<SDNode*, 4> SetCCs;
6452     if (!N0.hasOneUse())
6453       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6454     if (VT.isVector())
6455       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6456     if (DoXform) {
6457       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6458       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6459                                        LN0->getChain(),
6460                                        LN0->getBasePtr(), N0.getValueType(),
6461                                        LN0->getMemOperand());
6462       CombineTo(N, ExtLoad);
6463       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6464                                   N0.getValueType(), ExtLoad);
6465       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6466 
6467       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6468                       ISD::ZERO_EXTEND);
6469       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6470     }
6471   }
6472 
6473   // fold (zext (load x)) to multiple smaller zextloads.
6474   // Only on illegal but splittable vectors.
6475   if (SDValue ExtLoad = CombineExtLoad(N))
6476     return ExtLoad;
6477 
6478   // fold (zext (and/or/xor (load x), cst)) ->
6479   //      (and/or/xor (zextload x), (zext cst))
6480   // Unless (and (load x) cst) will match as a zextload already and has
6481   // additional users.
6482   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6483        N0.getOpcode() == ISD::XOR) &&
6484       isa<LoadSDNode>(N0.getOperand(0)) &&
6485       N0.getOperand(1).getOpcode() == ISD::Constant &&
6486       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6487       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6488     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6489     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6490       bool DoXform = true;
6491       SmallVector<SDNode*, 4> SetCCs;
6492       if (!N0.hasOneUse()) {
6493         if (N0.getOpcode() == ISD::AND) {
6494           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
6495           auto NarrowLoad = false;
6496           EVT LoadResultTy = AndC->getValueType(0);
6497           EVT ExtVT, LoadedVT;
6498           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
6499                                NarrowLoad))
6500             DoXform = false;
6501         }
6502         if (DoXform)
6503           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
6504                                             ISD::ZERO_EXTEND, SetCCs, TLI);
6505       }
6506       if (DoXform) {
6507         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6508                                          LN0->getChain(), LN0->getBasePtr(),
6509                                          LN0->getMemoryVT(),
6510                                          LN0->getMemOperand());
6511         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6512         Mask = Mask.zext(VT.getSizeInBits());
6513         SDLoc DL(N);
6514         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6515                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
6516         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6517                                     SDLoc(N0.getOperand(0)),
6518                                     N0.getOperand(0).getValueType(), ExtLoad);
6519         CombineTo(N, And);
6520         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6521         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6522                         ISD::ZERO_EXTEND);
6523         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6524       }
6525     }
6526   }
6527 
6528   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6529   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6530   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6531       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6532     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6533     EVT MemVT = LN0->getMemoryVT();
6534     if ((!LegalOperations && !LN0->isVolatile()) ||
6535         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6536       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6537                                        LN0->getChain(),
6538                                        LN0->getBasePtr(), MemVT,
6539                                        LN0->getMemOperand());
6540       CombineTo(N, ExtLoad);
6541       CombineTo(N0.getNode(),
6542                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6543                             ExtLoad),
6544                 ExtLoad.getValue(1));
6545       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6546     }
6547   }
6548 
6549   if (N0.getOpcode() == ISD::SETCC) {
6550     // Only do this before legalize for now.
6551     if (!LegalOperations && VT.isVector() &&
6552         N0.getValueType().getVectorElementType() == MVT::i1) {
6553       EVT N00VT = N0.getOperand(0).getValueType();
6554       if (getSetCCResultType(N00VT) == N0.getValueType())
6555         return SDValue();
6556 
6557       // We know that the # elements of the results is the same as the #
6558       // elements of the compare (and the # elements of the compare result for
6559       // that matter). Check to see that they are the same size. If so, we know
6560       // that the element size of the sext'd result matches the element size of
6561       // the compare operands.
6562       SDLoc DL(N);
6563       SDValue VecOnes = DAG.getConstant(1, DL, VT);
6564       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
6565         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6566         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
6567                                      N0.getOperand(1), N0.getOperand(2));
6568         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
6569       }
6570 
6571       // If the desired elements are smaller or larger than the source
6572       // elements we can use a matching integer vector type and then
6573       // truncate/sign extend.
6574       EVT MatchingElementType = EVT::getIntegerVT(
6575           *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
6576       EVT MatchingVectorType = EVT::getVectorVT(
6577           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
6578       SDValue VsetCC =
6579           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
6580                       N0.getOperand(1), N0.getOperand(2));
6581       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
6582                          VecOnes);
6583     }
6584 
6585     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6586     SDLoc DL(N);
6587     if (SDValue SCC = SimplifySelectCC(
6588             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6589             DAG.getConstant(0, DL, VT),
6590             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6591       return SCC;
6592   }
6593 
6594   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6595   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6596       isa<ConstantSDNode>(N0.getOperand(1)) &&
6597       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6598       N0.hasOneUse()) {
6599     SDValue ShAmt = N0.getOperand(1);
6600     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6601     if (N0.getOpcode() == ISD::SHL) {
6602       SDValue InnerZExt = N0.getOperand(0);
6603       // If the original shl may be shifting out bits, do not perform this
6604       // transformation.
6605       unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
6606         InnerZExt.getOperand(0).getValueType().getSizeInBits();
6607       if (ShAmtVal > KnownZeroBits)
6608         return SDValue();
6609     }
6610 
6611     SDLoc DL(N);
6612 
6613     // Ensure that the shift amount is wide enough for the shifted value.
6614     if (VT.getSizeInBits() >= 256)
6615       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6616 
6617     return DAG.getNode(N0.getOpcode(), DL, VT,
6618                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6619                        ShAmt);
6620   }
6621 
6622   return SDValue();
6623 }
6624 
6625 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6626   SDValue N0 = N->getOperand(0);
6627   EVT VT = N->getValueType(0);
6628 
6629   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6630                                               LegalOperations))
6631     return SDValue(Res, 0);
6632 
6633   // fold (aext (aext x)) -> (aext x)
6634   // fold (aext (zext x)) -> (zext x)
6635   // fold (aext (sext x)) -> (sext x)
6636   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
6637       N0.getOpcode() == ISD::ZERO_EXTEND ||
6638       N0.getOpcode() == ISD::SIGN_EXTEND)
6639     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6640 
6641   // fold (aext (truncate (load x))) -> (aext (smaller load x))
6642   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6643   if (N0.getOpcode() == ISD::TRUNCATE) {
6644     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6645       SDNode* oye = N0.getNode()->getOperand(0).getNode();
6646       if (NarrowLoad.getNode() != N0.getNode()) {
6647         CombineTo(N0.getNode(), NarrowLoad);
6648         // CombineTo deleted the truncate, if needed, but not what's under it.
6649         AddToWorklist(oye);
6650       }
6651       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6652     }
6653   }
6654 
6655   // fold (aext (truncate x))
6656   if (N0.getOpcode() == ISD::TRUNCATE) {
6657     SDValue TruncOp = N0.getOperand(0);
6658     if (TruncOp.getValueType() == VT)
6659       return TruncOp; // x iff x size == zext size.
6660     if (TruncOp.getValueType().bitsGT(VT))
6661       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6662     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6663   }
6664 
6665   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6666   // if the trunc is not free.
6667   if (N0.getOpcode() == ISD::AND &&
6668       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6669       N0.getOperand(1).getOpcode() == ISD::Constant &&
6670       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6671                           N0.getValueType())) {
6672     SDValue X = N0.getOperand(0).getOperand(0);
6673     if (X.getValueType().bitsLT(VT)) {
6674       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
6675     } else if (X.getValueType().bitsGT(VT)) {
6676       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
6677     }
6678     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6679     Mask = Mask.zext(VT.getSizeInBits());
6680     SDLoc DL(N);
6681     return DAG.getNode(ISD::AND, DL, VT,
6682                        X, DAG.getConstant(Mask, DL, VT));
6683   }
6684 
6685   // fold (aext (load x)) -> (aext (truncate (extload x)))
6686   // None of the supported targets knows how to perform load and any_ext
6687   // on vectors in one instruction.  We only perform this transformation on
6688   // scalars.
6689   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6690       ISD::isUNINDEXEDLoad(N0.getNode()) &&
6691       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6692     bool DoXform = true;
6693     SmallVector<SDNode*, 4> SetCCs;
6694     if (!N0.hasOneUse())
6695       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6696     if (DoXform) {
6697       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6698       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6699                                        LN0->getChain(),
6700                                        LN0->getBasePtr(), N0.getValueType(),
6701                                        LN0->getMemOperand());
6702       CombineTo(N, ExtLoad);
6703       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6704                                   N0.getValueType(), ExtLoad);
6705       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6706       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6707                       ISD::ANY_EXTEND);
6708       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6709     }
6710   }
6711 
6712   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6713   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6714   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
6715   if (N0.getOpcode() == ISD::LOAD &&
6716       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6717       N0.hasOneUse()) {
6718     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6719     ISD::LoadExtType ExtType = LN0->getExtensionType();
6720     EVT MemVT = LN0->getMemoryVT();
6721     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6722       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6723                                        VT, LN0->getChain(), LN0->getBasePtr(),
6724                                        MemVT, LN0->getMemOperand());
6725       CombineTo(N, ExtLoad);
6726       CombineTo(N0.getNode(),
6727                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6728                             N0.getValueType(), ExtLoad),
6729                 ExtLoad.getValue(1));
6730       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6731     }
6732   }
6733 
6734   if (N0.getOpcode() == ISD::SETCC) {
6735     // For vectors:
6736     // aext(setcc) -> vsetcc
6737     // aext(setcc) -> truncate(vsetcc)
6738     // aext(setcc) -> aext(vsetcc)
6739     // Only do this before legalize for now.
6740     if (VT.isVector() && !LegalOperations) {
6741       EVT N0VT = N0.getOperand(0).getValueType();
6742         // We know that the # elements of the results is the same as the
6743         // # elements of the compare (and the # elements of the compare result
6744         // for that matter).  Check to see that they are the same size.  If so,
6745         // we know that the element size of the sext'd result matches the
6746         // element size of the compare operands.
6747       if (VT.getSizeInBits() == N0VT.getSizeInBits())
6748         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6749                              N0.getOperand(1),
6750                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
6751       // If the desired elements are smaller or larger than the source
6752       // elements we can use a matching integer vector type and then
6753       // truncate/any extend
6754       else {
6755         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6756         SDValue VsetCC =
6757           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6758                         N0.getOperand(1),
6759                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
6760         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6761       }
6762     }
6763 
6764     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6765     SDLoc DL(N);
6766     if (SDValue SCC = SimplifySelectCC(
6767             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6768             DAG.getConstant(0, DL, VT),
6769             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6770       return SCC;
6771   }
6772 
6773   return SDValue();
6774 }
6775 
6776 /// See if the specified operand can be simplified with the knowledge that only
6777 /// the bits specified by Mask are used.  If so, return the simpler operand,
6778 /// otherwise return a null SDValue.
6779 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6780   switch (V.getOpcode()) {
6781   default: break;
6782   case ISD::Constant: {
6783     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
6784     assert(CV && "Const value should be ConstSDNode.");
6785     const APInt &CVal = CV->getAPIntValue();
6786     APInt NewVal = CVal & Mask;
6787     if (NewVal != CVal)
6788       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
6789     break;
6790   }
6791   case ISD::OR:
6792   case ISD::XOR:
6793     // If the LHS or RHS don't contribute bits to the or, drop them.
6794     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
6795       return V.getOperand(1);
6796     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
6797       return V.getOperand(0);
6798     break;
6799   case ISD::SRL:
6800     // Only look at single-use SRLs.
6801     if (!V.getNode()->hasOneUse())
6802       break;
6803     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
6804       // See if we can recursively simplify the LHS.
6805       unsigned Amt = RHSC->getZExtValue();
6806 
6807       // Watch out for shift count overflow though.
6808       if (Amt >= Mask.getBitWidth()) break;
6809       APInt NewMask = Mask << Amt;
6810       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
6811         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
6812                            SimplifyLHS, V.getOperand(1));
6813     }
6814   }
6815   return SDValue();
6816 }
6817 
6818 /// If the result of a wider load is shifted to right of N  bits and then
6819 /// truncated to a narrower type and where N is a multiple of number of bits of
6820 /// the narrower type, transform it to a narrower load from address + N / num of
6821 /// bits of new type. If the result is to be extended, also fold the extension
6822 /// to form a extending load.
6823 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
6824   unsigned Opc = N->getOpcode();
6825 
6826   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
6827   SDValue N0 = N->getOperand(0);
6828   EVT VT = N->getValueType(0);
6829   EVT ExtVT = VT;
6830 
6831   // This transformation isn't valid for vector loads.
6832   if (VT.isVector())
6833     return SDValue();
6834 
6835   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
6836   // extended to VT.
6837   if (Opc == ISD::SIGN_EXTEND_INREG) {
6838     ExtType = ISD::SEXTLOAD;
6839     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6840   } else if (Opc == ISD::SRL) {
6841     // Another special-case: SRL is basically zero-extending a narrower value.
6842     ExtType = ISD::ZEXTLOAD;
6843     N0 = SDValue(N, 0);
6844     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6845     if (!N01) return SDValue();
6846     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
6847                               VT.getSizeInBits() - N01->getZExtValue());
6848   }
6849   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
6850     return SDValue();
6851 
6852   unsigned EVTBits = ExtVT.getSizeInBits();
6853 
6854   // Do not generate loads of non-round integer types since these can
6855   // be expensive (and would be wrong if the type is not byte sized).
6856   if (!ExtVT.isRound())
6857     return SDValue();
6858 
6859   unsigned ShAmt = 0;
6860   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6861     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6862       ShAmt = N01->getZExtValue();
6863       // Is the shift amount a multiple of size of VT?
6864       if ((ShAmt & (EVTBits-1)) == 0) {
6865         N0 = N0.getOperand(0);
6866         // Is the load width a multiple of size of VT?
6867         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
6868           return SDValue();
6869       }
6870 
6871       // At this point, we must have a load or else we can't do the transform.
6872       if (!isa<LoadSDNode>(N0)) return SDValue();
6873 
6874       // Because a SRL must be assumed to *need* to zero-extend the high bits
6875       // (as opposed to anyext the high bits), we can't combine the zextload
6876       // lowering of SRL and an sextload.
6877       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
6878         return SDValue();
6879 
6880       // If the shift amount is larger than the input type then we're not
6881       // accessing any of the loaded bytes.  If the load was a zextload/extload
6882       // then the result of the shift+trunc is zero/undef (handled elsewhere).
6883       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
6884         return SDValue();
6885     }
6886   }
6887 
6888   // If the load is shifted left (and the result isn't shifted back right),
6889   // we can fold the truncate through the shift.
6890   unsigned ShLeftAmt = 0;
6891   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
6892       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
6893     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6894       ShLeftAmt = N01->getZExtValue();
6895       N0 = N0.getOperand(0);
6896     }
6897   }
6898 
6899   // If we haven't found a load, we can't narrow it.  Don't transform one with
6900   // multiple uses, this would require adding a new load.
6901   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
6902     return SDValue();
6903 
6904   // Don't change the width of a volatile load.
6905   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6906   if (LN0->isVolatile())
6907     return SDValue();
6908 
6909   // Verify that we are actually reducing a load width here.
6910   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
6911     return SDValue();
6912 
6913   // For the transform to be legal, the load must produce only two values
6914   // (the value loaded and the chain).  Don't transform a pre-increment
6915   // load, for example, which produces an extra value.  Otherwise the
6916   // transformation is not equivalent, and the downstream logic to replace
6917   // uses gets things wrong.
6918   if (LN0->getNumValues() > 2)
6919     return SDValue();
6920 
6921   // If the load that we're shrinking is an extload and we're not just
6922   // discarding the extension we can't simply shrink the load. Bail.
6923   // TODO: It would be possible to merge the extensions in some cases.
6924   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
6925       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
6926     return SDValue();
6927 
6928   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
6929     return SDValue();
6930 
6931   EVT PtrType = N0.getOperand(1).getValueType();
6932 
6933   if (PtrType == MVT::Untyped || PtrType.isExtended())
6934     // It's not possible to generate a constant of extended or untyped type.
6935     return SDValue();
6936 
6937   // For big endian targets, we need to adjust the offset to the pointer to
6938   // load the correct bytes.
6939   if (DAG.getDataLayout().isBigEndian()) {
6940     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
6941     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
6942     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
6943   }
6944 
6945   uint64_t PtrOff = ShAmt / 8;
6946   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
6947   SDLoc DL(LN0);
6948   // The original load itself didn't wrap, so an offset within it doesn't.
6949   SDNodeFlags Flags;
6950   Flags.setNoUnsignedWrap(true);
6951   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
6952                                PtrType, LN0->getBasePtr(),
6953                                DAG.getConstant(PtrOff, DL, PtrType),
6954                                &Flags);
6955   AddToWorklist(NewPtr.getNode());
6956 
6957   SDValue Load;
6958   if (ExtType == ISD::NON_EXTLOAD)
6959     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
6960                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
6961                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6962   else
6963     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
6964                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
6965                           NewAlign, LN0->getMemOperand()->getFlags(),
6966                           LN0->getAAInfo());
6967 
6968   // Replace the old load's chain with the new load's chain.
6969   WorklistRemover DeadNodes(*this);
6970   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
6971 
6972   // Shift the result left, if we've swallowed a left shift.
6973   SDValue Result = Load;
6974   if (ShLeftAmt != 0) {
6975     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
6976     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
6977       ShImmTy = VT;
6978     // If the shift amount is as large as the result size (but, presumably,
6979     // no larger than the source) then the useful bits of the result are
6980     // zero; we can't simply return the shortened shift, because the result
6981     // of that operation is undefined.
6982     SDLoc DL(N0);
6983     if (ShLeftAmt >= VT.getSizeInBits())
6984       Result = DAG.getConstant(0, DL, VT);
6985     else
6986       Result = DAG.getNode(ISD::SHL, DL, VT,
6987                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
6988   }
6989 
6990   // Return the new loaded value.
6991   return Result;
6992 }
6993 
6994 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
6995   SDValue N0 = N->getOperand(0);
6996   SDValue N1 = N->getOperand(1);
6997   EVT VT = N->getValueType(0);
6998   EVT EVT = cast<VTSDNode>(N1)->getVT();
6999   unsigned VTBits = VT.getScalarType().getSizeInBits();
7000   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
7001 
7002   if (N0.isUndef())
7003     return DAG.getUNDEF(VT);
7004 
7005   // fold (sext_in_reg c1) -> c1
7006   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7007     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7008 
7009   // If the input is already sign extended, just drop the extension.
7010   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7011     return N0;
7012 
7013   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7014   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7015       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7016     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7017                        N0.getOperand(0), N1);
7018 
7019   // fold (sext_in_reg (sext x)) -> (sext x)
7020   // fold (sext_in_reg (aext x)) -> (sext x)
7021   // if x is small enough.
7022   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7023     SDValue N00 = N0.getOperand(0);
7024     if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
7025         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7026       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7027   }
7028 
7029   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7030   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
7031     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7032 
7033   // fold operands of sext_in_reg based on knowledge that the top bits are not
7034   // demanded.
7035   if (SimplifyDemandedBits(SDValue(N, 0)))
7036     return SDValue(N, 0);
7037 
7038   // fold (sext_in_reg (load x)) -> (smaller sextload x)
7039   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7040   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7041     return NarrowLoad;
7042 
7043   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7044   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7045   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7046   if (N0.getOpcode() == ISD::SRL) {
7047     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7048       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7049         // We can turn this into an SRA iff the input to the SRL is already sign
7050         // extended enough.
7051         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7052         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7053           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7054                              N0.getOperand(0), N0.getOperand(1));
7055       }
7056   }
7057 
7058   // fold (sext_inreg (extload x)) -> (sextload x)
7059   if (ISD::isEXTLoad(N0.getNode()) &&
7060       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7061       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7062       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7063        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7064     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7065     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7066                                      LN0->getChain(),
7067                                      LN0->getBasePtr(), EVT,
7068                                      LN0->getMemOperand());
7069     CombineTo(N, ExtLoad);
7070     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7071     AddToWorklist(ExtLoad.getNode());
7072     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7073   }
7074   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7075   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7076       N0.hasOneUse() &&
7077       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7078       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7079        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7080     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7081     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7082                                      LN0->getChain(),
7083                                      LN0->getBasePtr(), EVT,
7084                                      LN0->getMemOperand());
7085     CombineTo(N, ExtLoad);
7086     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7087     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7088   }
7089 
7090   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7091   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7092     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7093                                            N0.getOperand(1), false))
7094       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7095                          BSwap, N1);
7096   }
7097 
7098   return SDValue();
7099 }
7100 
7101 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7102   SDValue N0 = N->getOperand(0);
7103   EVT VT = N->getValueType(0);
7104 
7105   if (N0.isUndef())
7106     return DAG.getUNDEF(VT);
7107 
7108   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7109                                               LegalOperations))
7110     return SDValue(Res, 0);
7111 
7112   return SDValue();
7113 }
7114 
7115 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7116   SDValue N0 = N->getOperand(0);
7117   EVT VT = N->getValueType(0);
7118 
7119   if (N0.isUndef())
7120     return DAG.getUNDEF(VT);
7121 
7122   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7123                                               LegalOperations))
7124     return SDValue(Res, 0);
7125 
7126   return SDValue();
7127 }
7128 
7129 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7130   SDValue N0 = N->getOperand(0);
7131   EVT VT = N->getValueType(0);
7132   bool isLE = DAG.getDataLayout().isLittleEndian();
7133 
7134   // noop truncate
7135   if (N0.getValueType() == N->getValueType(0))
7136     return N0;
7137   // fold (truncate c1) -> c1
7138   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7139     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7140   // fold (truncate (truncate x)) -> (truncate x)
7141   if (N0.getOpcode() == ISD::TRUNCATE)
7142     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7143   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7144   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7145       N0.getOpcode() == ISD::SIGN_EXTEND ||
7146       N0.getOpcode() == ISD::ANY_EXTEND) {
7147     // if the source is smaller than the dest, we still need an extend.
7148     if (N0.getOperand(0).getValueType().bitsLT(VT))
7149       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7150     // if the source is larger than the dest, than we just need the truncate.
7151     if (N0.getOperand(0).getValueType().bitsGT(VT))
7152       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7153     // if the source and dest are the same type, we can drop both the extend
7154     // and the truncate.
7155     return N0.getOperand(0);
7156   }
7157 
7158   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
7159   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
7160     return SDValue();
7161 
7162   // Fold extract-and-trunc into a narrow extract. For example:
7163   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
7164   //   i32 y = TRUNCATE(i64 x)
7165   //        -- becomes --
7166   //   v16i8 b = BITCAST (v2i64 val)
7167   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
7168   //
7169   // Note: We only run this optimization after type legalization (which often
7170   // creates this pattern) and before operation legalization after which
7171   // we need to be more careful about the vector instructions that we generate.
7172   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7173       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
7174 
7175     EVT VecTy = N0.getOperand(0).getValueType();
7176     EVT ExTy = N0.getValueType();
7177     EVT TrTy = N->getValueType(0);
7178 
7179     unsigned NumElem = VecTy.getVectorNumElements();
7180     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
7181 
7182     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
7183     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
7184 
7185     SDValue EltNo = N0->getOperand(1);
7186     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
7187       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
7188       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7189       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
7190 
7191       SDLoc DL(N);
7192       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
7193                          DAG.getBitcast(NVT, N0.getOperand(0)),
7194                          DAG.getConstant(Index, DL, IndexTy));
7195     }
7196   }
7197 
7198   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
7199   if (N0.getOpcode() == ISD::SELECT) {
7200     EVT SrcVT = N0.getValueType();
7201     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
7202         TLI.isTruncateFree(SrcVT, VT)) {
7203       SDLoc SL(N0);
7204       SDValue Cond = N0.getOperand(0);
7205       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
7206       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
7207       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
7208     }
7209   }
7210 
7211   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
7212   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7213       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
7214       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
7215     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
7216       uint64_t Amt = CAmt->getZExtValue();
7217       unsigned Size = VT.getScalarSizeInBits();
7218 
7219       if (Amt < Size) {
7220         SDLoc SL(N);
7221         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
7222 
7223         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
7224         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
7225                            DAG.getConstant(Amt, SL, AmtVT));
7226       }
7227     }
7228   }
7229 
7230   // Fold a series of buildvector, bitcast, and truncate if possible.
7231   // For example fold
7232   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
7233   //   (2xi32 (buildvector x, y)).
7234   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
7235       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
7236       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
7237       N0.getOperand(0).hasOneUse()) {
7238 
7239     SDValue BuildVect = N0.getOperand(0);
7240     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
7241     EVT TruncVecEltTy = VT.getVectorElementType();
7242 
7243     // Check that the element types match.
7244     if (BuildVectEltTy == TruncVecEltTy) {
7245       // Now we only need to compute the offset of the truncated elements.
7246       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
7247       unsigned TruncVecNumElts = VT.getVectorNumElements();
7248       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
7249 
7250       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
7251              "Invalid number of elements");
7252 
7253       SmallVector<SDValue, 8> Opnds;
7254       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
7255         Opnds.push_back(BuildVect.getOperand(i));
7256 
7257       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
7258     }
7259   }
7260 
7261   // See if we can simplify the input to this truncate through knowledge that
7262   // only the low bits are being used.
7263   // For example "trunc (or (shl x, 8), y)" // -> trunc y
7264   // Currently we only perform this optimization on scalars because vectors
7265   // may have different active low bits.
7266   if (!VT.isVector()) {
7267     if (SDValue Shorter =
7268             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
7269                                                      VT.getSizeInBits())))
7270       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
7271   }
7272   // fold (truncate (load x)) -> (smaller load x)
7273   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7274   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7275     if (SDValue Reduced = ReduceLoadWidth(N))
7276       return Reduced;
7277 
7278     // Handle the case where the load remains an extending load even
7279     // after truncation.
7280     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7281       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7282       if (!LN0->isVolatile() &&
7283           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7284         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7285                                          VT, LN0->getChain(), LN0->getBasePtr(),
7286                                          LN0->getMemoryVT(),
7287                                          LN0->getMemOperand());
7288         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7289         return NewLoad;
7290       }
7291     }
7292   }
7293   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7294   // where ... are all 'undef'.
7295   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7296     SmallVector<EVT, 8> VTs;
7297     SDValue V;
7298     unsigned Idx = 0;
7299     unsigned NumDefs = 0;
7300 
7301     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7302       SDValue X = N0.getOperand(i);
7303       if (!X.isUndef()) {
7304         V = X;
7305         Idx = i;
7306         NumDefs++;
7307       }
7308       // Stop if more than one members are non-undef.
7309       if (NumDefs > 1)
7310         break;
7311       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
7312                                      VT.getVectorElementType(),
7313                                      X.getValueType().getVectorNumElements()));
7314     }
7315 
7316     if (NumDefs == 0)
7317       return DAG.getUNDEF(VT);
7318 
7319     if (NumDefs == 1) {
7320       assert(V.getNode() && "The single defined operand is empty!");
7321       SmallVector<SDValue, 8> Opnds;
7322       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7323         if (i != Idx) {
7324           Opnds.push_back(DAG.getUNDEF(VTs[i]));
7325           continue;
7326         }
7327         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7328         AddToWorklist(NV.getNode());
7329         Opnds.push_back(NV);
7330       }
7331       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7332     }
7333   }
7334 
7335   // Fold truncate of a bitcast of a vector to an extract of the low vector
7336   // element.
7337   //
7338   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7339   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7340     SDValue VecSrc = N0.getOperand(0);
7341     EVT SrcVT = VecSrc.getValueType();
7342     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
7343         (!LegalOperations ||
7344          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
7345       SDLoc SL(N);
7346 
7347       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7348       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7349                          VecSrc, DAG.getConstant(0, SL, IdxVT));
7350     }
7351   }
7352 
7353   // Simplify the operands using demanded-bits information.
7354   if (!VT.isVector() &&
7355       SimplifyDemandedBits(SDValue(N, 0)))
7356     return SDValue(N, 0);
7357 
7358   return SDValue();
7359 }
7360 
7361 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7362   SDValue Elt = N->getOperand(i);
7363   if (Elt.getOpcode() != ISD::MERGE_VALUES)
7364     return Elt.getNode();
7365   return Elt.getOperand(Elt.getResNo()).getNode();
7366 }
7367 
7368 /// build_pair (load, load) -> load
7369 /// if load locations are consecutive.
7370 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7371   assert(N->getOpcode() == ISD::BUILD_PAIR);
7372 
7373   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
7374   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
7375   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7376       LD1->getAddressSpace() != LD2->getAddressSpace())
7377     return SDValue();
7378   EVT LD1VT = LD1->getValueType(0);
7379   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
7380   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
7381       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
7382     unsigned Align = LD1->getAlignment();
7383     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7384         VT.getTypeForEVT(*DAG.getContext()));
7385 
7386     if (NewAlign <= Align &&
7387         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7388       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
7389                          LD1->getPointerInfo(), Align);
7390   }
7391 
7392   return SDValue();
7393 }
7394 
7395 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
7396   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
7397   // and Lo parts; on big-endian machines it doesn't.
7398   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
7399 }
7400 
7401 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
7402                                     const TargetLowering &TLI) {
7403   // If this is not a bitcast to an FP type or if the target doesn't have
7404   // IEEE754-compliant FP logic, we're done.
7405   EVT VT = N->getValueType(0);
7406   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
7407     return SDValue();
7408 
7409   // TODO: Use splat values for the constant-checking below and remove this
7410   // restriction.
7411   SDValue N0 = N->getOperand(0);
7412   EVT SourceVT = N0.getValueType();
7413   if (SourceVT.isVector())
7414     return SDValue();
7415 
7416   unsigned FPOpcode;
7417   APInt SignMask;
7418   switch (N0.getOpcode()) {
7419   case ISD::AND:
7420     FPOpcode = ISD::FABS;
7421     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
7422     break;
7423   case ISD::XOR:
7424     FPOpcode = ISD::FNEG;
7425     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
7426     break;
7427   // TODO: ISD::OR --> ISD::FNABS?
7428   default:
7429     return SDValue();
7430   }
7431 
7432   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
7433   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
7434   SDValue LogicOp0 = N0.getOperand(0);
7435   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7436   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
7437       LogicOp0.getOpcode() == ISD::BITCAST &&
7438       LogicOp0->getOperand(0).getValueType() == VT)
7439     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
7440 
7441   return SDValue();
7442 }
7443 
7444 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7445   SDValue N0 = N->getOperand(0);
7446   EVT VT = N->getValueType(0);
7447 
7448   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7449   // Only do this before legalize, since afterward the target may be depending
7450   // on the bitconvert.
7451   // First check to see if this is all constant.
7452   if (!LegalTypes &&
7453       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7454       VT.isVector()) {
7455     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7456 
7457     EVT DestEltVT = N->getValueType(0).getVectorElementType();
7458     assert(!DestEltVT.isVector() &&
7459            "Element type of vector ValueType must not be vector!");
7460     if (isSimple)
7461       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7462   }
7463 
7464   // If the input is a constant, let getNode fold it.
7465   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7466     // If we can't allow illegal operations, we need to check that this is just
7467     // a fp -> int or int -> conversion and that the resulting operation will
7468     // be legal.
7469     if (!LegalOperations ||
7470         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7471          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7472         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7473          TLI.isOperationLegal(ISD::Constant, VT)))
7474       return DAG.getBitcast(VT, N0);
7475   }
7476 
7477   // (conv (conv x, t1), t2) -> (conv x, t2)
7478   if (N0.getOpcode() == ISD::BITCAST)
7479     return DAG.getBitcast(VT, N0.getOperand(0));
7480 
7481   // fold (conv (load x)) -> (load (conv*)x)
7482   // If the resultant load doesn't need a higher alignment than the original!
7483   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7484       // Do not change the width of a volatile load.
7485       !cast<LoadSDNode>(N0)->isVolatile() &&
7486       // Do not remove the cast if the types differ in endian layout.
7487       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7488           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7489       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7490       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7491     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7492     unsigned OrigAlign = LN0->getAlignment();
7493 
7494     bool Fast = false;
7495     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
7496                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
7497         Fast) {
7498       SDValue Load =
7499           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7500                       LN0->getPointerInfo(), OrigAlign,
7501                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7502       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7503       return Load;
7504     }
7505   }
7506 
7507   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
7508     return V;
7509 
7510   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7511   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7512   //
7513   // For ppc_fp128:
7514   // fold (bitcast (fneg x)) ->
7515   //     flipbit = signbit
7516   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7517   //
7518   // fold (bitcast (fabs x)) ->
7519   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
7520   //     (xor (bitcast x) (build_pair flipbit, flipbit))
7521   // This often reduces constant pool loads.
7522   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7523        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7524       N0.getNode()->hasOneUse() && VT.isInteger() &&
7525       !VT.isVector() && !N0.getValueType().isVector()) {
7526     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
7527     AddToWorklist(NewConv.getNode());
7528 
7529     SDLoc DL(N);
7530     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7531       assert(VT.getSizeInBits() == 128);
7532       SDValue SignBit = DAG.getConstant(
7533           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
7534       SDValue FlipBit;
7535       if (N0.getOpcode() == ISD::FNEG) {
7536         FlipBit = SignBit;
7537         AddToWorklist(FlipBit.getNode());
7538       } else {
7539         assert(N0.getOpcode() == ISD::FABS);
7540         SDValue Hi =
7541             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
7542                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7543                                               SDLoc(NewConv)));
7544         AddToWorklist(Hi.getNode());
7545         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
7546         AddToWorklist(FlipBit.getNode());
7547       }
7548       SDValue FlipBits =
7549           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7550       AddToWorklist(FlipBits.getNode());
7551       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
7552     }
7553     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7554     if (N0.getOpcode() == ISD::FNEG)
7555       return DAG.getNode(ISD::XOR, DL, VT,
7556                          NewConv, DAG.getConstant(SignBit, DL, VT));
7557     assert(N0.getOpcode() == ISD::FABS);
7558     return DAG.getNode(ISD::AND, DL, VT,
7559                        NewConv, DAG.getConstant(~SignBit, DL, VT));
7560   }
7561 
7562   // fold (bitconvert (fcopysign cst, x)) ->
7563   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
7564   // Note that we don't handle (copysign x, cst) because this can always be
7565   // folded to an fneg or fabs.
7566   //
7567   // For ppc_fp128:
7568   // fold (bitcast (fcopysign cst, x)) ->
7569   //     flipbit = (and (extract_element
7570   //                     (xor (bitcast cst), (bitcast x)), 0),
7571   //                    signbit)
7572   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
7573   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7574       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7575       VT.isInteger() && !VT.isVector()) {
7576     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
7577     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7578     if (isTypeLegal(IntXVT)) {
7579       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
7580       AddToWorklist(X.getNode());
7581 
7582       // If X has a different width than the result/lhs, sext it or truncate it.
7583       unsigned VTWidth = VT.getSizeInBits();
7584       if (OrigXWidth < VTWidth) {
7585         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7586         AddToWorklist(X.getNode());
7587       } else if (OrigXWidth > VTWidth) {
7588         // To get the sign bit in the right place, we have to shift it right
7589         // before truncating.
7590         SDLoc DL(X);
7591         X = DAG.getNode(ISD::SRL, DL,
7592                         X.getValueType(), X,
7593                         DAG.getConstant(OrigXWidth-VTWidth, DL,
7594                                         X.getValueType()));
7595         AddToWorklist(X.getNode());
7596         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7597         AddToWorklist(X.getNode());
7598       }
7599 
7600       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7601         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
7602         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7603         AddToWorklist(Cst.getNode());
7604         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
7605         AddToWorklist(X.getNode());
7606         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
7607         AddToWorklist(XorResult.getNode());
7608         SDValue XorResult64 = DAG.getNode(
7609             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
7610             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
7611                                   SDLoc(XorResult)));
7612         AddToWorklist(XorResult64.getNode());
7613         SDValue FlipBit =
7614             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
7615                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
7616         AddToWorklist(FlipBit.getNode());
7617         SDValue FlipBits =
7618             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7619         AddToWorklist(FlipBits.getNode());
7620         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
7621       }
7622       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7623       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7624                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
7625       AddToWorklist(X.getNode());
7626 
7627       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7628       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7629                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7630       AddToWorklist(Cst.getNode());
7631 
7632       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7633     }
7634   }
7635 
7636   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7637   if (N0.getOpcode() == ISD::BUILD_PAIR)
7638     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
7639       return CombineLD;
7640 
7641   // Remove double bitcasts from shuffles - this is often a legacy of
7642   // XformToShuffleWithZero being used to combine bitmaskings (of
7643   // float vectors bitcast to integer vectors) into shuffles.
7644   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7645   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7646       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7647       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
7648       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
7649     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7650 
7651     // If operands are a bitcast, peek through if it casts the original VT.
7652     // If operands are a constant, just bitcast back to original VT.
7653     auto PeekThroughBitcast = [&](SDValue Op) {
7654       if (Op.getOpcode() == ISD::BITCAST &&
7655           Op.getOperand(0).getValueType() == VT)
7656         return SDValue(Op.getOperand(0));
7657       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
7658           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
7659         return DAG.getBitcast(VT, Op);
7660       return SDValue();
7661     };
7662 
7663     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7664     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7665     if (!(SV0 && SV1))
7666       return SDValue();
7667 
7668     int MaskScale =
7669         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
7670     SmallVector<int, 8> NewMask;
7671     for (int M : SVN->getMask())
7672       for (int i = 0; i != MaskScale; ++i)
7673         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7674 
7675     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7676     if (!LegalMask) {
7677       std::swap(SV0, SV1);
7678       ShuffleVectorSDNode::commuteMask(NewMask);
7679       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7680     }
7681 
7682     if (LegalMask)
7683       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7684   }
7685 
7686   return SDValue();
7687 }
7688 
7689 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7690   EVT VT = N->getValueType(0);
7691   return CombineConsecutiveLoads(N, VT);
7692 }
7693 
7694 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7695 /// operands. DstEltVT indicates the destination element value type.
7696 SDValue DAGCombiner::
7697 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7698   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7699 
7700   // If this is already the right type, we're done.
7701   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7702 
7703   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7704   unsigned DstBitSize = DstEltVT.getSizeInBits();
7705 
7706   // If this is a conversion of N elements of one type to N elements of another
7707   // type, convert each element.  This handles FP<->INT cases.
7708   if (SrcBitSize == DstBitSize) {
7709     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7710                               BV->getValueType(0).getVectorNumElements());
7711 
7712     // Due to the FP element handling below calling this routine recursively,
7713     // we can end up with a scalar-to-vector node here.
7714     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7715       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7716                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
7717 
7718     SmallVector<SDValue, 8> Ops;
7719     for (SDValue Op : BV->op_values()) {
7720       // If the vector element type is not legal, the BUILD_VECTOR operands
7721       // are promoted and implicitly truncated.  Make that explicit here.
7722       if (Op.getValueType() != SrcEltVT)
7723         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7724       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
7725       AddToWorklist(Ops.back().getNode());
7726     }
7727     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
7728   }
7729 
7730   // Otherwise, we're growing or shrinking the elements.  To avoid having to
7731   // handle annoying details of growing/shrinking FP values, we convert them to
7732   // int first.
7733   if (SrcEltVT.isFloatingPoint()) {
7734     // Convert the input float vector to a int vector where the elements are the
7735     // same sizes.
7736     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7737     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7738     SrcEltVT = IntVT;
7739   }
7740 
7741   // Now we know the input is an integer vector.  If the output is a FP type,
7742   // convert to integer first, then to FP of the right size.
7743   if (DstEltVT.isFloatingPoint()) {
7744     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7745     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7746 
7747     // Next, convert to FP elements of the same size.
7748     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7749   }
7750 
7751   SDLoc DL(BV);
7752 
7753   // Okay, we know the src/dst types are both integers of differing types.
7754   // Handling growing first.
7755   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7756   if (SrcBitSize < DstBitSize) {
7757     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7758 
7759     SmallVector<SDValue, 8> Ops;
7760     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7761          i += NumInputsPerOutput) {
7762       bool isLE = DAG.getDataLayout().isLittleEndian();
7763       APInt NewBits = APInt(DstBitSize, 0);
7764       bool EltIsUndef = true;
7765       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7766         // Shift the previously computed bits over.
7767         NewBits <<= SrcBitSize;
7768         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7769         if (Op.isUndef()) continue;
7770         EltIsUndef = false;
7771 
7772         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
7773                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
7774       }
7775 
7776       if (EltIsUndef)
7777         Ops.push_back(DAG.getUNDEF(DstEltVT));
7778       else
7779         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
7780     }
7781 
7782     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
7783     return DAG.getBuildVector(VT, DL, Ops);
7784   }
7785 
7786   // Finally, this must be the case where we are shrinking elements: each input
7787   // turns into multiple outputs.
7788   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
7789   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7790                             NumOutputsPerInput*BV->getNumOperands());
7791   SmallVector<SDValue, 8> Ops;
7792 
7793   for (const SDValue &Op : BV->op_values()) {
7794     if (Op.isUndef()) {
7795       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
7796       continue;
7797     }
7798 
7799     APInt OpVal = cast<ConstantSDNode>(Op)->
7800                   getAPIntValue().zextOrTrunc(SrcBitSize);
7801 
7802     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
7803       APInt ThisVal = OpVal.trunc(DstBitSize);
7804       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
7805       OpVal = OpVal.lshr(DstBitSize);
7806     }
7807 
7808     // For big endian targets, swap the order of the pieces of each element.
7809     if (DAG.getDataLayout().isBigEndian())
7810       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
7811   }
7812 
7813   return DAG.getBuildVector(VT, DL, Ops);
7814 }
7815 
7816 /// Try to perform FMA combining on a given FADD node.
7817 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
7818   SDValue N0 = N->getOperand(0);
7819   SDValue N1 = N->getOperand(1);
7820   EVT VT = N->getValueType(0);
7821   SDLoc SL(N);
7822 
7823   const TargetOptions &Options = DAG.getTarget().Options;
7824   bool AllowFusion =
7825       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7826 
7827   // Floating-point multiply-add with intermediate rounding.
7828   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
7829 
7830   // Floating-point multiply-add without intermediate rounding.
7831   bool HasFMA =
7832       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7833       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
7834 
7835   // No valid opcode, do not combine.
7836   if (!HasFMAD && !HasFMA)
7837     return SDValue();
7838 
7839   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
7840   ;
7841   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
7842     return SDValue();
7843 
7844   // Always prefer FMAD to FMA for precision.
7845   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7846   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7847   bool LookThroughFPExt = TLI.isFPExtFree(VT);
7848 
7849   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
7850   // prefer to fold the multiply with fewer uses.
7851   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
7852       N1.getOpcode() == ISD::FMUL) {
7853     if (N0.getNode()->use_size() > N1.getNode()->use_size())
7854       std::swap(N0, N1);
7855   }
7856 
7857   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
7858   if (N0.getOpcode() == ISD::FMUL &&
7859       (Aggressive || N0->hasOneUse())) {
7860     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7861                        N0.getOperand(0), N0.getOperand(1), N1);
7862   }
7863 
7864   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
7865   // Note: Commutes FADD operands.
7866   if (N1.getOpcode() == ISD::FMUL &&
7867       (Aggressive || N1->hasOneUse())) {
7868     return DAG.getNode(PreferredFusedOpcode, SL, VT,
7869                        N1.getOperand(0), N1.getOperand(1), N0);
7870   }
7871 
7872   // Look through FP_EXTEND nodes to do more combining.
7873   if (AllowFusion && LookThroughFPExt) {
7874     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7875     if (N0.getOpcode() == ISD::FP_EXTEND) {
7876       SDValue N00 = N0.getOperand(0);
7877       if (N00.getOpcode() == ISD::FMUL)
7878         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7879                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7880                                        N00.getOperand(0)),
7881                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7882                                        N00.getOperand(1)), N1);
7883     }
7884 
7885     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
7886     // Note: Commutes FADD operands.
7887     if (N1.getOpcode() == ISD::FP_EXTEND) {
7888       SDValue N10 = N1.getOperand(0);
7889       if (N10.getOpcode() == ISD::FMUL)
7890         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7891                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7892                                        N10.getOperand(0)),
7893                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
7894                                        N10.getOperand(1)), N0);
7895     }
7896   }
7897 
7898   // More folding opportunities when target permits.
7899   if ((AllowFusion || HasFMAD)  && Aggressive) {
7900     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
7901     if (N0.getOpcode() == PreferredFusedOpcode &&
7902         N0.getOperand(2).getOpcode() == ISD::FMUL) {
7903       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7904                          N0.getOperand(0), N0.getOperand(1),
7905                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7906                                      N0.getOperand(2).getOperand(0),
7907                                      N0.getOperand(2).getOperand(1),
7908                                      N1));
7909     }
7910 
7911     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
7912     if (N1->getOpcode() == PreferredFusedOpcode &&
7913         N1.getOperand(2).getOpcode() == ISD::FMUL) {
7914       return DAG.getNode(PreferredFusedOpcode, SL, VT,
7915                          N1.getOperand(0), N1.getOperand(1),
7916                          DAG.getNode(PreferredFusedOpcode, SL, VT,
7917                                      N1.getOperand(2).getOperand(0),
7918                                      N1.getOperand(2).getOperand(1),
7919                                      N0));
7920     }
7921 
7922     if (AllowFusion && LookThroughFPExt) {
7923       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
7924       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
7925       auto FoldFAddFMAFPExtFMul = [&] (
7926           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7927         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
7928                            DAG.getNode(PreferredFusedOpcode, SL, VT,
7929                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7930                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7931                                        Z));
7932       };
7933       if (N0.getOpcode() == PreferredFusedOpcode) {
7934         SDValue N02 = N0.getOperand(2);
7935         if (N02.getOpcode() == ISD::FP_EXTEND) {
7936           SDValue N020 = N02.getOperand(0);
7937           if (N020.getOpcode() == ISD::FMUL)
7938             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
7939                                         N020.getOperand(0), N020.getOperand(1),
7940                                         N1);
7941         }
7942       }
7943 
7944       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
7945       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
7946       // FIXME: This turns two single-precision and one double-precision
7947       // operation into two double-precision operations, which might not be
7948       // interesting for all targets, especially GPUs.
7949       auto FoldFAddFPExtFMAFMul = [&] (
7950           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7951         return DAG.getNode(PreferredFusedOpcode, SL, VT,
7952                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
7953                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
7954                            DAG.getNode(PreferredFusedOpcode, SL, VT,
7955                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7956                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7957                                        Z));
7958       };
7959       if (N0.getOpcode() == ISD::FP_EXTEND) {
7960         SDValue N00 = N0.getOperand(0);
7961         if (N00.getOpcode() == PreferredFusedOpcode) {
7962           SDValue N002 = N00.getOperand(2);
7963           if (N002.getOpcode() == ISD::FMUL)
7964             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
7965                                         N002.getOperand(0), N002.getOperand(1),
7966                                         N1);
7967         }
7968       }
7969 
7970       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
7971       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
7972       if (N1.getOpcode() == PreferredFusedOpcode) {
7973         SDValue N12 = N1.getOperand(2);
7974         if (N12.getOpcode() == ISD::FP_EXTEND) {
7975           SDValue N120 = N12.getOperand(0);
7976           if (N120.getOpcode() == ISD::FMUL)
7977             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
7978                                         N120.getOperand(0), N120.getOperand(1),
7979                                         N0);
7980         }
7981       }
7982 
7983       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
7984       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
7985       // FIXME: This turns two single-precision and one double-precision
7986       // operation into two double-precision operations, which might not be
7987       // interesting for all targets, especially GPUs.
7988       if (N1.getOpcode() == ISD::FP_EXTEND) {
7989         SDValue N10 = N1.getOperand(0);
7990         if (N10.getOpcode() == PreferredFusedOpcode) {
7991           SDValue N102 = N10.getOperand(2);
7992           if (N102.getOpcode() == ISD::FMUL)
7993             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
7994                                         N102.getOperand(0), N102.getOperand(1),
7995                                         N0);
7996         }
7997       }
7998     }
7999   }
8000 
8001   return SDValue();
8002 }
8003 
8004 /// Try to perform FMA combining on a given FSUB node.
8005 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8006   SDValue N0 = N->getOperand(0);
8007   SDValue N1 = N->getOperand(1);
8008   EVT VT = N->getValueType(0);
8009   SDLoc SL(N);
8010 
8011   const TargetOptions &Options = DAG.getTarget().Options;
8012   bool AllowFusion =
8013       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8014 
8015   // Floating-point multiply-add with intermediate rounding.
8016   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8017 
8018   // Floating-point multiply-add without intermediate rounding.
8019   bool HasFMA =
8020       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8021       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8022 
8023   // No valid opcode, do not combine.
8024   if (!HasFMAD && !HasFMA)
8025     return SDValue();
8026 
8027   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8028   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8029     return SDValue();
8030 
8031   // Always prefer FMAD to FMA for precision.
8032   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8033   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8034   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8035 
8036   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8037   if (N0.getOpcode() == ISD::FMUL &&
8038       (Aggressive || N0->hasOneUse())) {
8039     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8040                        N0.getOperand(0), N0.getOperand(1),
8041                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8042   }
8043 
8044   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8045   // Note: Commutes FSUB operands.
8046   if (N1.getOpcode() == ISD::FMUL &&
8047       (Aggressive || N1->hasOneUse()))
8048     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8049                        DAG.getNode(ISD::FNEG, SL, VT,
8050                                    N1.getOperand(0)),
8051                        N1.getOperand(1), N0);
8052 
8053   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8054   if (N0.getOpcode() == ISD::FNEG &&
8055       N0.getOperand(0).getOpcode() == ISD::FMUL &&
8056       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8057     SDValue N00 = N0.getOperand(0).getOperand(0);
8058     SDValue N01 = N0.getOperand(0).getOperand(1);
8059     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8060                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8061                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8062   }
8063 
8064   // Look through FP_EXTEND nodes to do more combining.
8065   if (AllowFusion && LookThroughFPExt) {
8066     // fold (fsub (fpext (fmul x, y)), z)
8067     //   -> (fma (fpext x), (fpext y), (fneg z))
8068     if (N0.getOpcode() == ISD::FP_EXTEND) {
8069       SDValue N00 = N0.getOperand(0);
8070       if (N00.getOpcode() == ISD::FMUL)
8071         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8072                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8073                                        N00.getOperand(0)),
8074                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8075                                        N00.getOperand(1)),
8076                            DAG.getNode(ISD::FNEG, SL, VT, N1));
8077     }
8078 
8079     // fold (fsub x, (fpext (fmul y, z)))
8080     //   -> (fma (fneg (fpext y)), (fpext z), x)
8081     // Note: Commutes FSUB operands.
8082     if (N1.getOpcode() == ISD::FP_EXTEND) {
8083       SDValue N10 = N1.getOperand(0);
8084       if (N10.getOpcode() == ISD::FMUL)
8085         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8086                            DAG.getNode(ISD::FNEG, SL, VT,
8087                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
8088                                                    N10.getOperand(0))),
8089                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8090                                        N10.getOperand(1)),
8091                            N0);
8092     }
8093 
8094     // fold (fsub (fpext (fneg (fmul, x, y))), z)
8095     //   -> (fneg (fma (fpext x), (fpext y), z))
8096     // Note: This could be removed with appropriate canonicalization of the
8097     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8098     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8099     // from implementing the canonicalization in visitFSUB.
8100     if (N0.getOpcode() == ISD::FP_EXTEND) {
8101       SDValue N00 = N0.getOperand(0);
8102       if (N00.getOpcode() == ISD::FNEG) {
8103         SDValue N000 = N00.getOperand(0);
8104         if (N000.getOpcode() == ISD::FMUL) {
8105           return DAG.getNode(ISD::FNEG, SL, VT,
8106                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8107                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8108                                                      N000.getOperand(0)),
8109                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8110                                                      N000.getOperand(1)),
8111                                          N1));
8112         }
8113       }
8114     }
8115 
8116     // fold (fsub (fneg (fpext (fmul, x, y))), z)
8117     //   -> (fneg (fma (fpext x)), (fpext y), z)
8118     // Note: This could be removed with appropriate canonicalization of the
8119     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8120     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8121     // from implementing the canonicalization in visitFSUB.
8122     if (N0.getOpcode() == ISD::FNEG) {
8123       SDValue N00 = N0.getOperand(0);
8124       if (N00.getOpcode() == ISD::FP_EXTEND) {
8125         SDValue N000 = N00.getOperand(0);
8126         if (N000.getOpcode() == ISD::FMUL) {
8127           return DAG.getNode(ISD::FNEG, SL, VT,
8128                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8129                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8130                                                      N000.getOperand(0)),
8131                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8132                                                      N000.getOperand(1)),
8133                                          N1));
8134         }
8135       }
8136     }
8137 
8138   }
8139 
8140   // More folding opportunities when target permits.
8141   if ((AllowFusion || HasFMAD) && Aggressive) {
8142     // fold (fsub (fma x, y, (fmul u, v)), z)
8143     //   -> (fma x, y (fma u, v, (fneg z)))
8144     if (N0.getOpcode() == PreferredFusedOpcode &&
8145         N0.getOperand(2).getOpcode() == ISD::FMUL) {
8146       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8147                          N0.getOperand(0), N0.getOperand(1),
8148                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8149                                      N0.getOperand(2).getOperand(0),
8150                                      N0.getOperand(2).getOperand(1),
8151                                      DAG.getNode(ISD::FNEG, SL, VT,
8152                                                  N1)));
8153     }
8154 
8155     // fold (fsub x, (fma y, z, (fmul u, v)))
8156     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
8157     if (N1.getOpcode() == PreferredFusedOpcode &&
8158         N1.getOperand(2).getOpcode() == ISD::FMUL) {
8159       SDValue N20 = N1.getOperand(2).getOperand(0);
8160       SDValue N21 = N1.getOperand(2).getOperand(1);
8161       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8162                          DAG.getNode(ISD::FNEG, SL, VT,
8163                                      N1.getOperand(0)),
8164                          N1.getOperand(1),
8165                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8166                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
8167 
8168                                      N21, N0));
8169     }
8170 
8171     if (AllowFusion && LookThroughFPExt) {
8172       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
8173       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
8174       if (N0.getOpcode() == PreferredFusedOpcode) {
8175         SDValue N02 = N0.getOperand(2);
8176         if (N02.getOpcode() == ISD::FP_EXTEND) {
8177           SDValue N020 = N02.getOperand(0);
8178           if (N020.getOpcode() == ISD::FMUL)
8179             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8180                                N0.getOperand(0), N0.getOperand(1),
8181                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8182                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8183                                                        N020.getOperand(0)),
8184                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8185                                                        N020.getOperand(1)),
8186                                            DAG.getNode(ISD::FNEG, SL, VT,
8187                                                        N1)));
8188         }
8189       }
8190 
8191       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
8192       //   -> (fma (fpext x), (fpext y),
8193       //           (fma (fpext u), (fpext v), (fneg z)))
8194       // FIXME: This turns two single-precision and one double-precision
8195       // operation into two double-precision operations, which might not be
8196       // interesting for all targets, especially GPUs.
8197       if (N0.getOpcode() == ISD::FP_EXTEND) {
8198         SDValue N00 = N0.getOperand(0);
8199         if (N00.getOpcode() == PreferredFusedOpcode) {
8200           SDValue N002 = N00.getOperand(2);
8201           if (N002.getOpcode() == ISD::FMUL)
8202             return DAG.getNode(PreferredFusedOpcode, SL, VT,
8203                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8204                                            N00.getOperand(0)),
8205                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
8206                                            N00.getOperand(1)),
8207                                DAG.getNode(PreferredFusedOpcode, SL, VT,
8208                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8209                                                        N002.getOperand(0)),
8210                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8211                                                        N002.getOperand(1)),
8212                                            DAG.getNode(ISD::FNEG, SL, VT,
8213                                                        N1)));
8214         }
8215       }
8216 
8217       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
8218       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
8219       if (N1.getOpcode() == PreferredFusedOpcode &&
8220         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
8221         SDValue N120 = N1.getOperand(2).getOperand(0);
8222         if (N120.getOpcode() == ISD::FMUL) {
8223           SDValue N1200 = N120.getOperand(0);
8224           SDValue N1201 = N120.getOperand(1);
8225           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8226                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
8227                              N1.getOperand(1),
8228                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8229                                          DAG.getNode(ISD::FNEG, SL, VT,
8230                                              DAG.getNode(ISD::FP_EXTEND, SL,
8231                                                          VT, N1200)),
8232                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8233                                                      N1201),
8234                                          N0));
8235         }
8236       }
8237 
8238       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
8239       //   -> (fma (fneg (fpext y)), (fpext z),
8240       //           (fma (fneg (fpext u)), (fpext v), x))
8241       // FIXME: This turns two single-precision and one double-precision
8242       // operation into two double-precision operations, which might not be
8243       // interesting for all targets, especially GPUs.
8244       if (N1.getOpcode() == ISD::FP_EXTEND &&
8245         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
8246         SDValue N100 = N1.getOperand(0).getOperand(0);
8247         SDValue N101 = N1.getOperand(0).getOperand(1);
8248         SDValue N102 = N1.getOperand(0).getOperand(2);
8249         if (N102.getOpcode() == ISD::FMUL) {
8250           SDValue N1020 = N102.getOperand(0);
8251           SDValue N1021 = N102.getOperand(1);
8252           return DAG.getNode(PreferredFusedOpcode, SL, VT,
8253                              DAG.getNode(ISD::FNEG, SL, VT,
8254                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8255                                                      N100)),
8256                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
8257                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8258                                          DAG.getNode(ISD::FNEG, SL, VT,
8259                                              DAG.getNode(ISD::FP_EXTEND, SL,
8260                                                          VT, N1020)),
8261                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8262                                                      N1021),
8263                                          N0));
8264         }
8265       }
8266     }
8267   }
8268 
8269   return SDValue();
8270 }
8271 
8272 /// Try to perform FMA combining on a given FMUL node.
8273 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
8274   SDValue N0 = N->getOperand(0);
8275   SDValue N1 = N->getOperand(1);
8276   EVT VT = N->getValueType(0);
8277   SDLoc SL(N);
8278 
8279   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
8280 
8281   const TargetOptions &Options = DAG.getTarget().Options;
8282   bool AllowFusion =
8283       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8284 
8285   // Floating-point multiply-add with intermediate rounding.
8286   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8287 
8288   // Floating-point multiply-add without intermediate rounding.
8289   bool HasFMA =
8290       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8291       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8292 
8293   // No valid opcode, do not combine.
8294   if (!HasFMAD && !HasFMA)
8295     return SDValue();
8296 
8297   // Always prefer FMAD to FMA for precision.
8298   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8299   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8300 
8301   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
8302   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
8303   auto FuseFADD = [&](SDValue X, SDValue Y) {
8304     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
8305       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8306       if (XC1 && XC1->isExactlyValue(+1.0))
8307         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8308       if (XC1 && XC1->isExactlyValue(-1.0))
8309         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8310                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8311     }
8312     return SDValue();
8313   };
8314 
8315   if (SDValue FMA = FuseFADD(N0, N1))
8316     return FMA;
8317   if (SDValue FMA = FuseFADD(N1, N0))
8318     return FMA;
8319 
8320   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
8321   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
8322   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
8323   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
8324   auto FuseFSUB = [&](SDValue X, SDValue Y) {
8325     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
8326       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
8327       if (XC0 && XC0->isExactlyValue(+1.0))
8328         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8329                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8330                            Y);
8331       if (XC0 && XC0->isExactlyValue(-1.0))
8332         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8333                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8334                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8335 
8336       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8337       if (XC1 && XC1->isExactlyValue(+1.0))
8338         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8339                            DAG.getNode(ISD::FNEG, SL, VT, Y));
8340       if (XC1 && XC1->isExactlyValue(-1.0))
8341         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8342     }
8343     return SDValue();
8344   };
8345 
8346   if (SDValue FMA = FuseFSUB(N0, N1))
8347     return FMA;
8348   if (SDValue FMA = FuseFSUB(N1, N0))
8349     return FMA;
8350 
8351   return SDValue();
8352 }
8353 
8354 SDValue DAGCombiner::visitFADD(SDNode *N) {
8355   SDValue N0 = N->getOperand(0);
8356   SDValue N1 = N->getOperand(1);
8357   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
8358   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
8359   EVT VT = N->getValueType(0);
8360   SDLoc DL(N);
8361   const TargetOptions &Options = DAG.getTarget().Options;
8362   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8363 
8364   // fold vector ops
8365   if (VT.isVector())
8366     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8367       return FoldedVOp;
8368 
8369   // fold (fadd c1, c2) -> c1 + c2
8370   if (N0CFP && N1CFP)
8371     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
8372 
8373   // canonicalize constant to RHS
8374   if (N0CFP && !N1CFP)
8375     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
8376 
8377   // fold (fadd A, (fneg B)) -> (fsub A, B)
8378   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8379       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
8380     return DAG.getNode(ISD::FSUB, DL, VT, N0,
8381                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8382 
8383   // fold (fadd (fneg A), B) -> (fsub B, A)
8384   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8385       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
8386     return DAG.getNode(ISD::FSUB, DL, VT, N1,
8387                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
8388 
8389   // If 'unsafe math' is enabled, fold lots of things.
8390   if (Options.UnsafeFPMath) {
8391     // No FP constant should be created after legalization as Instruction
8392     // Selection pass has a hard time dealing with FP constants.
8393     bool AllowNewConst = (Level < AfterLegalizeDAG);
8394 
8395     // fold (fadd A, 0) -> A
8396     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
8397       if (N1C->isZero())
8398         return N0;
8399 
8400     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
8401     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
8402         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
8403       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
8404                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
8405                                      Flags),
8406                          Flags);
8407 
8408     // If allowed, fold (fadd (fneg x), x) -> 0.0
8409     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
8410       return DAG.getConstantFP(0.0, DL, VT);
8411 
8412     // If allowed, fold (fadd x, (fneg x)) -> 0.0
8413     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
8414       return DAG.getConstantFP(0.0, DL, VT);
8415 
8416     // We can fold chains of FADD's of the same value into multiplications.
8417     // This transform is not safe in general because we are reducing the number
8418     // of rounding steps.
8419     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
8420       if (N0.getOpcode() == ISD::FMUL) {
8421         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8422         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
8423 
8424         // (fadd (fmul x, c), x) -> (fmul x, c+1)
8425         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
8426           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8427                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8428           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
8429         }
8430 
8431         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
8432         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
8433             N1.getOperand(0) == N1.getOperand(1) &&
8434             N0.getOperand(0) == N1.getOperand(0)) {
8435           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8436                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8437           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
8438         }
8439       }
8440 
8441       if (N1.getOpcode() == ISD::FMUL) {
8442         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8443         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
8444 
8445         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
8446         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
8447           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8448                                        DAG.getConstantFP(1.0, DL, VT), Flags);
8449           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
8450         }
8451 
8452         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
8453         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
8454             N0.getOperand(0) == N0.getOperand(1) &&
8455             N1.getOperand(0) == N0.getOperand(0)) {
8456           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8457                                        DAG.getConstantFP(2.0, DL, VT), Flags);
8458           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
8459         }
8460       }
8461 
8462       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
8463         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8464         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
8465         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
8466             (N0.getOperand(0) == N1)) {
8467           return DAG.getNode(ISD::FMUL, DL, VT,
8468                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
8469         }
8470       }
8471 
8472       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
8473         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8474         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
8475         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
8476             N1.getOperand(0) == N0) {
8477           return DAG.getNode(ISD::FMUL, DL, VT,
8478                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
8479         }
8480       }
8481 
8482       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
8483       if (AllowNewConst &&
8484           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
8485           N0.getOperand(0) == N0.getOperand(1) &&
8486           N1.getOperand(0) == N1.getOperand(1) &&
8487           N0.getOperand(0) == N1.getOperand(0)) {
8488         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
8489                            DAG.getConstantFP(4.0, DL, VT), Flags);
8490       }
8491     }
8492   } // enable-unsafe-fp-math
8493 
8494   // FADD -> FMA combines:
8495   if (SDValue Fused = visitFADDForFMACombine(N)) {
8496     AddToWorklist(Fused.getNode());
8497     return Fused;
8498   }
8499   return SDValue();
8500 }
8501 
8502 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8503   SDValue N0 = N->getOperand(0);
8504   SDValue N1 = N->getOperand(1);
8505   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8506   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8507   EVT VT = N->getValueType(0);
8508   SDLoc dl(N);
8509   const TargetOptions &Options = DAG.getTarget().Options;
8510   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8511 
8512   // fold vector ops
8513   if (VT.isVector())
8514     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8515       return FoldedVOp;
8516 
8517   // fold (fsub c1, c2) -> c1-c2
8518   if (N0CFP && N1CFP)
8519     return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
8520 
8521   // fold (fsub A, (fneg B)) -> (fadd A, B)
8522   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8523     return DAG.getNode(ISD::FADD, dl, VT, N0,
8524                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8525 
8526   // If 'unsafe math' is enabled, fold lots of things.
8527   if (Options.UnsafeFPMath) {
8528     // (fsub A, 0) -> A
8529     if (N1CFP && N1CFP->isZero())
8530       return N0;
8531 
8532     // (fsub 0, B) -> -B
8533     if (N0CFP && N0CFP->isZero()) {
8534       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8535         return GetNegatedExpression(N1, DAG, LegalOperations);
8536       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8537         return DAG.getNode(ISD::FNEG, dl, VT, N1);
8538     }
8539 
8540     // (fsub x, x) -> 0.0
8541     if (N0 == N1)
8542       return DAG.getConstantFP(0.0f, dl, VT);
8543 
8544     // (fsub x, (fadd x, y)) -> (fneg y)
8545     // (fsub x, (fadd y, x)) -> (fneg y)
8546     if (N1.getOpcode() == ISD::FADD) {
8547       SDValue N10 = N1->getOperand(0);
8548       SDValue N11 = N1->getOperand(1);
8549 
8550       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8551         return GetNegatedExpression(N11, DAG, LegalOperations);
8552 
8553       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8554         return GetNegatedExpression(N10, DAG, LegalOperations);
8555     }
8556   }
8557 
8558   // FSUB -> FMA combines:
8559   if (SDValue Fused = visitFSUBForFMACombine(N)) {
8560     AddToWorklist(Fused.getNode());
8561     return Fused;
8562   }
8563 
8564   return SDValue();
8565 }
8566 
8567 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8568   SDValue N0 = N->getOperand(0);
8569   SDValue N1 = N->getOperand(1);
8570   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
8571   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
8572   EVT VT = N->getValueType(0);
8573   SDLoc DL(N);
8574   const TargetOptions &Options = DAG.getTarget().Options;
8575   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8576 
8577   // fold vector ops
8578   if (VT.isVector()) {
8579     // This just handles C1 * C2 for vectors. Other vector folds are below.
8580     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8581       return FoldedVOp;
8582   }
8583 
8584   // fold (fmul c1, c2) -> c1*c2
8585   if (N0CFP && N1CFP)
8586     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
8587 
8588   // canonicalize constant to RHS
8589   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8590      !isConstantFPBuildVectorOrConstantFP(N1))
8591     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
8592 
8593   // fold (fmul A, 1.0) -> A
8594   if (N1CFP && N1CFP->isExactlyValue(1.0))
8595     return N0;
8596 
8597   if (Options.UnsafeFPMath) {
8598     // fold (fmul A, 0) -> 0
8599     if (N1CFP && N1CFP->isZero())
8600       return N1;
8601 
8602     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8603     if (N0.getOpcode() == ISD::FMUL) {
8604       // Fold scalars or any vector constants (not just splats).
8605       // This fold is done in general by InstCombine, but extra fmul insts
8606       // may have been generated during lowering.
8607       SDValue N00 = N0.getOperand(0);
8608       SDValue N01 = N0.getOperand(1);
8609       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8610       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8611       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8612 
8613       // Check 1: Make sure that the first operand of the inner multiply is NOT
8614       // a constant. Otherwise, we may induce infinite looping.
8615       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8616         // Check 2: Make sure that the second operand of the inner multiply and
8617         // the second operand of the outer multiply are constants.
8618         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8619             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8620           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
8621           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
8622         }
8623       }
8624     }
8625 
8626     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8627     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8628     // during an early run of DAGCombiner can prevent folding with fmuls
8629     // inserted during lowering.
8630     if (N0.getOpcode() == ISD::FADD &&
8631         (N0.getOperand(0) == N0.getOperand(1)) &&
8632         N0.hasOneUse()) {
8633       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8634       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
8635       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
8636     }
8637   }
8638 
8639   // fold (fmul X, 2.0) -> (fadd X, X)
8640   if (N1CFP && N1CFP->isExactlyValue(+2.0))
8641     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
8642 
8643   // fold (fmul X, -1.0) -> (fneg X)
8644   if (N1CFP && N1CFP->isExactlyValue(-1.0))
8645     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8646       return DAG.getNode(ISD::FNEG, DL, VT, N0);
8647 
8648   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8649   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8650     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8651       // Both can be negated for free, check to see if at least one is cheaper
8652       // negated.
8653       if (LHSNeg == 2 || RHSNeg == 2)
8654         return DAG.getNode(ISD::FMUL, DL, VT,
8655                            GetNegatedExpression(N0, DAG, LegalOperations),
8656                            GetNegatedExpression(N1, DAG, LegalOperations),
8657                            Flags);
8658     }
8659   }
8660 
8661   // FMUL -> FMA combines:
8662   if (SDValue Fused = visitFMULForFMACombine(N)) {
8663     AddToWorklist(Fused.getNode());
8664     return Fused;
8665   }
8666 
8667   return SDValue();
8668 }
8669 
8670 SDValue DAGCombiner::visitFMA(SDNode *N) {
8671   SDValue N0 = N->getOperand(0);
8672   SDValue N1 = N->getOperand(1);
8673   SDValue N2 = N->getOperand(2);
8674   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8675   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8676   EVT VT = N->getValueType(0);
8677   SDLoc dl(N);
8678   const TargetOptions &Options = DAG.getTarget().Options;
8679 
8680   // Constant fold FMA.
8681   if (isa<ConstantFPSDNode>(N0) &&
8682       isa<ConstantFPSDNode>(N1) &&
8683       isa<ConstantFPSDNode>(N2)) {
8684     return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
8685   }
8686 
8687   if (Options.UnsafeFPMath) {
8688     if (N0CFP && N0CFP->isZero())
8689       return N2;
8690     if (N1CFP && N1CFP->isZero())
8691       return N2;
8692   }
8693   // TODO: The FMA node should have flags that propagate to these nodes.
8694   if (N0CFP && N0CFP->isExactlyValue(1.0))
8695     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8696   if (N1CFP && N1CFP->isExactlyValue(1.0))
8697     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8698 
8699   // Canonicalize (fma c, x, y) -> (fma x, c, y)
8700   if (isConstantFPBuildVectorOrConstantFP(N0) &&
8701      !isConstantFPBuildVectorOrConstantFP(N1))
8702     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8703 
8704   // TODO: FMA nodes should have flags that propagate to the created nodes.
8705   // For now, create a Flags object for use with all unsafe math transforms.
8706   SDNodeFlags Flags;
8707   Flags.setUnsafeAlgebra(true);
8708 
8709   if (Options.UnsafeFPMath) {
8710     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8711     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
8712         isConstantFPBuildVectorOrConstantFP(N1) &&
8713         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
8714       return DAG.getNode(ISD::FMUL, dl, VT, N0,
8715                          DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
8716                                      &Flags), &Flags);
8717     }
8718 
8719     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8720     if (N0.getOpcode() == ISD::FMUL &&
8721         isConstantFPBuildVectorOrConstantFP(N1) &&
8722         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
8723       return DAG.getNode(ISD::FMA, dl, VT,
8724                          N0.getOperand(0),
8725                          DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
8726                                      &Flags),
8727                          N2);
8728     }
8729   }
8730 
8731   // (fma x, 1, y) -> (fadd x, y)
8732   // (fma x, -1, y) -> (fadd (fneg x), y)
8733   if (N1CFP) {
8734     if (N1CFP->isExactlyValue(1.0))
8735       // TODO: The FMA node should have flags that propagate to this node.
8736       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
8737 
8738     if (N1CFP->isExactlyValue(-1.0) &&
8739         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8740       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
8741       AddToWorklist(RHSNeg.getNode());
8742       // TODO: The FMA node should have flags that propagate to this node.
8743       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
8744     }
8745   }
8746 
8747   if (Options.UnsafeFPMath) {
8748     // (fma x, c, x) -> (fmul x, (c+1))
8749     if (N1CFP && N0 == N2) {
8750     return DAG.getNode(ISD::FMUL, dl, VT, N0,
8751                          DAG.getNode(ISD::FADD, dl, VT,
8752                                      N1, DAG.getConstantFP(1.0, dl, VT),
8753                                      &Flags), &Flags);
8754     }
8755 
8756     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
8757     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
8758       return DAG.getNode(ISD::FMUL, dl, VT, N0,
8759                          DAG.getNode(ISD::FADD, dl, VT,
8760                                      N1, DAG.getConstantFP(-1.0, dl, VT),
8761                                      &Flags), &Flags);
8762     }
8763   }
8764 
8765   return SDValue();
8766 }
8767 
8768 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8769 // reciprocal.
8770 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8771 // Notice that this is not always beneficial. One reason is different target
8772 // may have different costs for FDIV and FMUL, so sometimes the cost of two
8773 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8774 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8775 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
8776   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
8777   const SDNodeFlags *Flags = N->getFlags();
8778   if (!UnsafeMath && !Flags->hasAllowReciprocal())
8779     return SDValue();
8780 
8781   // Skip if current node is a reciprocal.
8782   SDValue N0 = N->getOperand(0);
8783   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8784   if (N0CFP && N0CFP->isExactlyValue(1.0))
8785     return SDValue();
8786 
8787   // Exit early if the target does not want this transform or if there can't
8788   // possibly be enough uses of the divisor to make the transform worthwhile.
8789   SDValue N1 = N->getOperand(1);
8790   unsigned MinUses = TLI.combineRepeatedFPDivisors();
8791   if (!MinUses || N1->use_size() < MinUses)
8792     return SDValue();
8793 
8794   // Find all FDIV users of the same divisor.
8795   // Use a set because duplicates may be present in the user list.
8796   SetVector<SDNode *> Users;
8797   for (auto *U : N1->uses()) {
8798     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
8799       // This division is eligible for optimization only if global unsafe math
8800       // is enabled or if this division allows reciprocal formation.
8801       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
8802         Users.insert(U);
8803     }
8804   }
8805 
8806   // Now that we have the actual number of divisor uses, make sure it meets
8807   // the minimum threshold specified by the target.
8808   if (Users.size() < MinUses)
8809     return SDValue();
8810 
8811   EVT VT = N->getValueType(0);
8812   SDLoc DL(N);
8813   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8814   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
8815 
8816   // Dividend / Divisor -> Dividend * Reciprocal
8817   for (auto *U : Users) {
8818     SDValue Dividend = U->getOperand(0);
8819     if (Dividend != FPOne) {
8820       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8821                                     Reciprocal, Flags);
8822       CombineTo(U, NewNode);
8823     } else if (U != Reciprocal.getNode()) {
8824       // In the absence of fast-math-flags, this user node is always the
8825       // same node as Reciprocal, but with FMF they may be different nodes.
8826       CombineTo(U, Reciprocal);
8827     }
8828   }
8829   return SDValue(N, 0);  // N was replaced.
8830 }
8831 
8832 SDValue DAGCombiner::visitFDIV(SDNode *N) {
8833   SDValue N0 = N->getOperand(0);
8834   SDValue N1 = N->getOperand(1);
8835   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8836   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8837   EVT VT = N->getValueType(0);
8838   SDLoc DL(N);
8839   const TargetOptions &Options = DAG.getTarget().Options;
8840   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8841 
8842   // fold vector ops
8843   if (VT.isVector())
8844     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8845       return FoldedVOp;
8846 
8847   // fold (fdiv c1, c2) -> c1/c2
8848   if (N0CFP && N1CFP)
8849     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
8850 
8851   if (Options.UnsafeFPMath) {
8852     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
8853     if (N1CFP) {
8854       // Compute the reciprocal 1.0 / c2.
8855       const APFloat &N1APF = N1CFP->getValueAPF();
8856       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
8857       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
8858       // Only do the transform if the reciprocal is a legal fp immediate that
8859       // isn't too nasty (eg NaN, denormal, ...).
8860       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
8861           (!LegalOperations ||
8862            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
8863            // backend)... we should handle this gracefully after Legalize.
8864            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
8865            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
8866            TLI.isFPImmLegal(Recip, VT)))
8867         return DAG.getNode(ISD::FMUL, DL, VT, N0,
8868                            DAG.getConstantFP(Recip, DL, VT), Flags);
8869     }
8870 
8871     // If this FDIV is part of a reciprocal square root, it may be folded
8872     // into a target-specific square root estimate instruction.
8873     if (N1.getOpcode() == ISD::FSQRT) {
8874       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
8875         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8876       }
8877     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
8878                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8879       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8880                                           Flags)) {
8881         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
8882         AddToWorklist(RV.getNode());
8883         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8884       }
8885     } else if (N1.getOpcode() == ISD::FP_ROUND &&
8886                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8887       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
8888                                           Flags)) {
8889         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
8890         AddToWorklist(RV.getNode());
8891         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8892       }
8893     } else if (N1.getOpcode() == ISD::FMUL) {
8894       // Look through an FMUL. Even though this won't remove the FDIV directly,
8895       // it's still worthwhile to get rid of the FSQRT if possible.
8896       SDValue SqrtOp;
8897       SDValue OtherOp;
8898       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8899         SqrtOp = N1.getOperand(0);
8900         OtherOp = N1.getOperand(1);
8901       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
8902         SqrtOp = N1.getOperand(1);
8903         OtherOp = N1.getOperand(0);
8904       }
8905       if (SqrtOp.getNode()) {
8906         // We found a FSQRT, so try to make this fold:
8907         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
8908         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
8909           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
8910           AddToWorklist(RV.getNode());
8911           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8912         }
8913       }
8914     }
8915 
8916     // Fold into a reciprocal estimate and multiply instead of a real divide.
8917     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
8918       AddToWorklist(RV.getNode());
8919       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
8920     }
8921   }
8922 
8923   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
8924   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8925     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8926       // Both can be negated for free, check to see if at least one is cheaper
8927       // negated.
8928       if (LHSNeg == 2 || RHSNeg == 2)
8929         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
8930                            GetNegatedExpression(N0, DAG, LegalOperations),
8931                            GetNegatedExpression(N1, DAG, LegalOperations),
8932                            Flags);
8933     }
8934   }
8935 
8936   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
8937     return CombineRepeatedDivisors;
8938 
8939   return SDValue();
8940 }
8941 
8942 SDValue DAGCombiner::visitFREM(SDNode *N) {
8943   SDValue N0 = N->getOperand(0);
8944   SDValue N1 = N->getOperand(1);
8945   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8946   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8947   EVT VT = N->getValueType(0);
8948 
8949   // fold (frem c1, c2) -> fmod(c1,c2)
8950   if (N0CFP && N1CFP)
8951     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
8952                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
8953 
8954   return SDValue();
8955 }
8956 
8957 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
8958   if (!DAG.getTarget().Options.UnsafeFPMath)
8959     return SDValue();
8960 
8961   SDValue N0 = N->getOperand(0);
8962   if (TLI.isFsqrtCheap(N0, DAG))
8963     return SDValue();
8964 
8965   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
8966   // For now, create a Flags object for use with all unsafe math transforms.
8967   SDNodeFlags Flags;
8968   Flags.setUnsafeAlgebra(true);
8969   return buildSqrtEstimate(N0, &Flags);
8970 }
8971 
8972 /// copysign(x, fp_extend(y)) -> copysign(x, y)
8973 /// copysign(x, fp_round(y)) -> copysign(x, y)
8974 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
8975   SDValue N1 = N->getOperand(1);
8976   if ((N1.getOpcode() == ISD::FP_EXTEND ||
8977        N1.getOpcode() == ISD::FP_ROUND)) {
8978     // Do not optimize out type conversion of f128 type yet.
8979     // For some targets like x86_64, configuration is changed to keep one f128
8980     // value in one SSE register, but instruction selection cannot handle
8981     // FCOPYSIGN on SSE registers yet.
8982     EVT N1VT = N1->getValueType(0);
8983     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
8984     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
8985   }
8986   return false;
8987 }
8988 
8989 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
8990   SDValue N0 = N->getOperand(0);
8991   SDValue N1 = N->getOperand(1);
8992   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8993   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8994   EVT VT = N->getValueType(0);
8995 
8996   if (N0CFP && N1CFP)  // Constant fold
8997     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
8998 
8999   if (N1CFP) {
9000     const APFloat& V = N1CFP->getValueAPF();
9001     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
9002     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9003     if (!V.isNegative()) {
9004       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9005         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9006     } else {
9007       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9008         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9009                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9010     }
9011   }
9012 
9013   // copysign(fabs(x), y) -> copysign(x, y)
9014   // copysign(fneg(x), y) -> copysign(x, y)
9015   // copysign(copysign(x,z), y) -> copysign(x, y)
9016   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9017       N0.getOpcode() == ISD::FCOPYSIGN)
9018     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9019                        N0.getOperand(0), N1);
9020 
9021   // copysign(x, abs(y)) -> abs(x)
9022   if (N1.getOpcode() == ISD::FABS)
9023     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9024 
9025   // copysign(x, copysign(y,z)) -> copysign(x, z)
9026   if (N1.getOpcode() == ISD::FCOPYSIGN)
9027     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9028                        N0, N1.getOperand(1));
9029 
9030   // copysign(x, fp_extend(y)) -> copysign(x, y)
9031   // copysign(x, fp_round(y)) -> copysign(x, y)
9032   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
9033     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9034                        N0, N1.getOperand(0));
9035 
9036   return SDValue();
9037 }
9038 
9039 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9040   SDValue N0 = N->getOperand(0);
9041   EVT VT = N->getValueType(0);
9042   EVT OpVT = N0.getValueType();
9043 
9044   // fold (sint_to_fp c1) -> c1fp
9045   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9046       // ...but only if the target supports immediate floating-point values
9047       (!LegalOperations ||
9048        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9049     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9050 
9051   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9052   // but UINT_TO_FP is legal on this target, try to convert.
9053   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9054       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9055     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9056     if (DAG.SignBitIsZero(N0))
9057       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9058   }
9059 
9060   // The next optimizations are desirable only if SELECT_CC can be lowered.
9061   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9062     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9063     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9064         !VT.isVector() &&
9065         (!LegalOperations ||
9066          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9067       SDLoc DL(N);
9068       SDValue Ops[] =
9069         { N0.getOperand(0), N0.getOperand(1),
9070           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9071           N0.getOperand(2) };
9072       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9073     }
9074 
9075     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9076     //      (select_cc x, y, 1.0, 0.0,, cc)
9077     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9078         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9079         (!LegalOperations ||
9080          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9081       SDLoc DL(N);
9082       SDValue Ops[] =
9083         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
9084           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9085           N0.getOperand(0).getOperand(2) };
9086       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9087     }
9088   }
9089 
9090   return SDValue();
9091 }
9092 
9093 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
9094   SDValue N0 = N->getOperand(0);
9095   EVT VT = N->getValueType(0);
9096   EVT OpVT = N0.getValueType();
9097 
9098   // fold (uint_to_fp c1) -> c1fp
9099   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9100       // ...but only if the target supports immediate floating-point values
9101       (!LegalOperations ||
9102        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9103     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9104 
9105   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
9106   // but SINT_TO_FP is legal on this target, try to convert.
9107   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
9108       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
9109     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
9110     if (DAG.SignBitIsZero(N0))
9111       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9112   }
9113 
9114   // The next optimizations are desirable only if SELECT_CC can be lowered.
9115   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9116     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9117 
9118     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
9119         (!LegalOperations ||
9120          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9121       SDLoc DL(N);
9122       SDValue Ops[] =
9123         { N0.getOperand(0), N0.getOperand(1),
9124           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9125           N0.getOperand(2) };
9126       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9127     }
9128   }
9129 
9130   return SDValue();
9131 }
9132 
9133 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
9134 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
9135   SDValue N0 = N->getOperand(0);
9136   EVT VT = N->getValueType(0);
9137 
9138   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
9139     return SDValue();
9140 
9141   SDValue Src = N0.getOperand(0);
9142   EVT SrcVT = Src.getValueType();
9143   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
9144   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
9145 
9146   // We can safely assume the conversion won't overflow the output range,
9147   // because (for example) (uint8_t)18293.f is undefined behavior.
9148 
9149   // Since we can assume the conversion won't overflow, our decision as to
9150   // whether the input will fit in the float should depend on the minimum
9151   // of the input range and output range.
9152 
9153   // This means this is also safe for a signed input and unsigned output, since
9154   // a negative input would lead to undefined behavior.
9155   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
9156   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
9157   unsigned ActualSize = std::min(InputSize, OutputSize);
9158   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
9159 
9160   // We can only fold away the float conversion if the input range can be
9161   // represented exactly in the float range.
9162   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
9163     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
9164       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
9165                                                        : ISD::ZERO_EXTEND;
9166       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
9167     }
9168     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
9169       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
9170     return DAG.getBitcast(VT, Src);
9171   }
9172   return SDValue();
9173 }
9174 
9175 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
9176   SDValue N0 = N->getOperand(0);
9177   EVT VT = N->getValueType(0);
9178 
9179   // fold (fp_to_sint c1fp) -> c1
9180   if (isConstantFPBuildVectorOrConstantFP(N0))
9181     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
9182 
9183   return FoldIntToFPToInt(N, DAG);
9184 }
9185 
9186 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
9187   SDValue N0 = N->getOperand(0);
9188   EVT VT = N->getValueType(0);
9189 
9190   // fold (fp_to_uint c1fp) -> c1
9191   if (isConstantFPBuildVectorOrConstantFP(N0))
9192     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
9193 
9194   return FoldIntToFPToInt(N, DAG);
9195 }
9196 
9197 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
9198   SDValue N0 = N->getOperand(0);
9199   SDValue N1 = N->getOperand(1);
9200   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9201   EVT VT = N->getValueType(0);
9202 
9203   // fold (fp_round c1fp) -> c1fp
9204   if (N0CFP)
9205     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
9206 
9207   // fold (fp_round (fp_extend x)) -> x
9208   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
9209     return N0.getOperand(0);
9210 
9211   // fold (fp_round (fp_round x)) -> (fp_round x)
9212   if (N0.getOpcode() == ISD::FP_ROUND) {
9213     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
9214     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
9215 
9216     // Skip this folding if it results in an fp_round from f80 to f16.
9217     //
9218     // f80 to f16 always generates an expensive (and as yet, unimplemented)
9219     // libcall to __truncxfhf2 instead of selecting native f16 conversion
9220     // instructions from f32 or f64.  Moreover, the first (value-preserving)
9221     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
9222     // x86.
9223     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
9224       return SDValue();
9225 
9226     // If the first fp_round isn't a value preserving truncation, it might
9227     // introduce a tie in the second fp_round, that wouldn't occur in the
9228     // single-step fp_round we want to fold to.
9229     // In other words, double rounding isn't the same as rounding.
9230     // Also, this is a value preserving truncation iff both fp_round's are.
9231     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
9232       SDLoc DL(N);
9233       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
9234                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
9235     }
9236   }
9237 
9238   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
9239   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
9240     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
9241                               N0.getOperand(0), N1);
9242     AddToWorklist(Tmp.getNode());
9243     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9244                        Tmp, N0.getOperand(1));
9245   }
9246 
9247   return SDValue();
9248 }
9249 
9250 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
9251   SDValue N0 = N->getOperand(0);
9252   EVT VT = N->getValueType(0);
9253   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9254   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9255 
9256   // fold (fp_round_inreg c1fp) -> c1fp
9257   if (N0CFP && isTypeLegal(EVT)) {
9258     SDLoc DL(N);
9259     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
9260     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
9261   }
9262 
9263   return SDValue();
9264 }
9265 
9266 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
9267   SDValue N0 = N->getOperand(0);
9268   EVT VT = N->getValueType(0);
9269 
9270   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
9271   if (N->hasOneUse() &&
9272       N->use_begin()->getOpcode() == ISD::FP_ROUND)
9273     return SDValue();
9274 
9275   // fold (fp_extend c1fp) -> c1fp
9276   if (isConstantFPBuildVectorOrConstantFP(N0))
9277     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
9278 
9279   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
9280   if (N0.getOpcode() == ISD::FP16_TO_FP &&
9281       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
9282     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
9283 
9284   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
9285   // value of X.
9286   if (N0.getOpcode() == ISD::FP_ROUND
9287       && N0.getNode()->getConstantOperandVal(1) == 1) {
9288     SDValue In = N0.getOperand(0);
9289     if (In.getValueType() == VT) return In;
9290     if (VT.bitsLT(In.getValueType()))
9291       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
9292                          In, N0.getOperand(1));
9293     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
9294   }
9295 
9296   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
9297   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9298        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9299     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9300     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9301                                      LN0->getChain(),
9302                                      LN0->getBasePtr(), N0.getValueType(),
9303                                      LN0->getMemOperand());
9304     CombineTo(N, ExtLoad);
9305     CombineTo(N0.getNode(),
9306               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
9307                           N0.getValueType(), ExtLoad,
9308                           DAG.getIntPtrConstant(1, SDLoc(N0))),
9309               ExtLoad.getValue(1));
9310     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9311   }
9312 
9313   return SDValue();
9314 }
9315 
9316 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
9317   SDValue N0 = N->getOperand(0);
9318   EVT VT = N->getValueType(0);
9319 
9320   // fold (fceil c1) -> fceil(c1)
9321   if (isConstantFPBuildVectorOrConstantFP(N0))
9322     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
9323 
9324   return SDValue();
9325 }
9326 
9327 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
9328   SDValue N0 = N->getOperand(0);
9329   EVT VT = N->getValueType(0);
9330 
9331   // fold (ftrunc c1) -> ftrunc(c1)
9332   if (isConstantFPBuildVectorOrConstantFP(N0))
9333     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
9334 
9335   return SDValue();
9336 }
9337 
9338 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
9339   SDValue N0 = N->getOperand(0);
9340   EVT VT = N->getValueType(0);
9341 
9342   // fold (ffloor c1) -> ffloor(c1)
9343   if (isConstantFPBuildVectorOrConstantFP(N0))
9344     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
9345 
9346   return SDValue();
9347 }
9348 
9349 // FIXME: FNEG and FABS have a lot in common; refactor.
9350 SDValue DAGCombiner::visitFNEG(SDNode *N) {
9351   SDValue N0 = N->getOperand(0);
9352   EVT VT = N->getValueType(0);
9353 
9354   // Constant fold FNEG.
9355   if (isConstantFPBuildVectorOrConstantFP(N0))
9356     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
9357 
9358   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
9359                          &DAG.getTarget().Options))
9360     return GetNegatedExpression(N0, DAG, LegalOperations);
9361 
9362   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
9363   // constant pool values.
9364   if (!TLI.isFNegFree(VT) &&
9365       N0.getOpcode() == ISD::BITCAST &&
9366       N0.getNode()->hasOneUse()) {
9367     SDValue Int = N0.getOperand(0);
9368     EVT IntVT = Int.getValueType();
9369     if (IntVT.isInteger() && !IntVT.isVector()) {
9370       APInt SignMask;
9371       if (N0.getValueType().isVector()) {
9372         // For a vector, get a mask such as 0x80... per scalar element
9373         // and splat it.
9374         SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
9375         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9376       } else {
9377         // For a scalar, just generate 0x80...
9378         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
9379       }
9380       SDLoc DL0(N0);
9381       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
9382                         DAG.getConstant(SignMask, DL0, IntVT));
9383       AddToWorklist(Int.getNode());
9384       return DAG.getBitcast(VT, Int);
9385     }
9386   }
9387 
9388   // (fneg (fmul c, x)) -> (fmul -c, x)
9389   if (N0.getOpcode() == ISD::FMUL &&
9390       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
9391     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
9392     if (CFP1) {
9393       APFloat CVal = CFP1->getValueAPF();
9394       CVal.changeSign();
9395       if (Level >= AfterLegalizeDAG &&
9396           (TLI.isFPImmLegal(CVal, VT) ||
9397            TLI.isOperationLegal(ISD::ConstantFP, VT)))
9398         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
9399                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9400                                        N0.getOperand(1)),
9401                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
9402     }
9403   }
9404 
9405   return SDValue();
9406 }
9407 
9408 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
9409   SDValue N0 = N->getOperand(0);
9410   SDValue N1 = N->getOperand(1);
9411   EVT VT = N->getValueType(0);
9412   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9413   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9414 
9415   if (N0CFP && N1CFP) {
9416     const APFloat &C0 = N0CFP->getValueAPF();
9417     const APFloat &C1 = N1CFP->getValueAPF();
9418     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
9419   }
9420 
9421   // Canonicalize to constant on RHS.
9422   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9423      !isConstantFPBuildVectorOrConstantFP(N1))
9424     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
9425 
9426   return SDValue();
9427 }
9428 
9429 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
9430   SDValue N0 = N->getOperand(0);
9431   SDValue N1 = N->getOperand(1);
9432   EVT VT = N->getValueType(0);
9433   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9434   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9435 
9436   if (N0CFP && N1CFP) {
9437     const APFloat &C0 = N0CFP->getValueAPF();
9438     const APFloat &C1 = N1CFP->getValueAPF();
9439     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
9440   }
9441 
9442   // Canonicalize to constant on RHS.
9443   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9444      !isConstantFPBuildVectorOrConstantFP(N1))
9445     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
9446 
9447   return SDValue();
9448 }
9449 
9450 SDValue DAGCombiner::visitFABS(SDNode *N) {
9451   SDValue N0 = N->getOperand(0);
9452   EVT VT = N->getValueType(0);
9453 
9454   // fold (fabs c1) -> fabs(c1)
9455   if (isConstantFPBuildVectorOrConstantFP(N0))
9456     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9457 
9458   // fold (fabs (fabs x)) -> (fabs x)
9459   if (N0.getOpcode() == ISD::FABS)
9460     return N->getOperand(0);
9461 
9462   // fold (fabs (fneg x)) -> (fabs x)
9463   // fold (fabs (fcopysign x, y)) -> (fabs x)
9464   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
9465     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
9466 
9467   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
9468   // constant pool values.
9469   if (!TLI.isFAbsFree(VT) &&
9470       N0.getOpcode() == ISD::BITCAST &&
9471       N0.getNode()->hasOneUse()) {
9472     SDValue Int = N0.getOperand(0);
9473     EVT IntVT = Int.getValueType();
9474     if (IntVT.isInteger() && !IntVT.isVector()) {
9475       APInt SignMask;
9476       if (N0.getValueType().isVector()) {
9477         // For a vector, get a mask such as 0x7f... per scalar element
9478         // and splat it.
9479         SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
9480         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9481       } else {
9482         // For a scalar, just generate 0x7f...
9483         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
9484       }
9485       SDLoc DL(N0);
9486       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
9487                         DAG.getConstant(SignMask, DL, IntVT));
9488       AddToWorklist(Int.getNode());
9489       return DAG.getBitcast(N->getValueType(0), Int);
9490     }
9491   }
9492 
9493   return SDValue();
9494 }
9495 
9496 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
9497   SDValue Chain = N->getOperand(0);
9498   SDValue N1 = N->getOperand(1);
9499   SDValue N2 = N->getOperand(2);
9500 
9501   // If N is a constant we could fold this into a fallthrough or unconditional
9502   // branch. However that doesn't happen very often in normal code, because
9503   // Instcombine/SimplifyCFG should have handled the available opportunities.
9504   // If we did this folding here, it would be necessary to update the
9505   // MachineBasicBlock CFG, which is awkward.
9506 
9507   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
9508   // on the target.
9509   if (N1.getOpcode() == ISD::SETCC &&
9510       TLI.isOperationLegalOrCustom(ISD::BR_CC,
9511                                    N1.getOperand(0).getValueType())) {
9512     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9513                        Chain, N1.getOperand(2),
9514                        N1.getOperand(0), N1.getOperand(1), N2);
9515   }
9516 
9517   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
9518       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
9519        (N1.getOperand(0).hasOneUse() &&
9520         N1.getOperand(0).getOpcode() == ISD::SRL))) {
9521     SDNode *Trunc = nullptr;
9522     if (N1.getOpcode() == ISD::TRUNCATE) {
9523       // Look pass the truncate.
9524       Trunc = N1.getNode();
9525       N1 = N1.getOperand(0);
9526     }
9527 
9528     // Match this pattern so that we can generate simpler code:
9529     //
9530     //   %a = ...
9531     //   %b = and i32 %a, 2
9532     //   %c = srl i32 %b, 1
9533     //   brcond i32 %c ...
9534     //
9535     // into
9536     //
9537     //   %a = ...
9538     //   %b = and i32 %a, 2
9539     //   %c = setcc eq %b, 0
9540     //   brcond %c ...
9541     //
9542     // This applies only when the AND constant value has one bit set and the
9543     // SRL constant is equal to the log2 of the AND constant. The back-end is
9544     // smart enough to convert the result into a TEST/JMP sequence.
9545     SDValue Op0 = N1.getOperand(0);
9546     SDValue Op1 = N1.getOperand(1);
9547 
9548     if (Op0.getOpcode() == ISD::AND &&
9549         Op1.getOpcode() == ISD::Constant) {
9550       SDValue AndOp1 = Op0.getOperand(1);
9551 
9552       if (AndOp1.getOpcode() == ISD::Constant) {
9553         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
9554 
9555         if (AndConst.isPowerOf2() &&
9556             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9557           SDLoc DL(N);
9558           SDValue SetCC =
9559             DAG.getSetCC(DL,
9560                          getSetCCResultType(Op0.getValueType()),
9561                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9562                          ISD::SETNE);
9563 
9564           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9565                                           MVT::Other, Chain, SetCC, N2);
9566           // Don't add the new BRCond into the worklist or else SimplifySelectCC
9567           // will convert it back to (X & C1) >> C2.
9568           CombineTo(N, NewBRCond, false);
9569           // Truncate is dead.
9570           if (Trunc)
9571             deleteAndRecombine(Trunc);
9572           // Replace the uses of SRL with SETCC
9573           WorklistRemover DeadNodes(*this);
9574           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9575           deleteAndRecombine(N1.getNode());
9576           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9577         }
9578       }
9579     }
9580 
9581     if (Trunc)
9582       // Restore N1 if the above transformation doesn't match.
9583       N1 = N->getOperand(1);
9584   }
9585 
9586   // Transform br(xor(x, y)) -> br(x != y)
9587   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9588   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9589     SDNode *TheXor = N1.getNode();
9590     SDValue Op0 = TheXor->getOperand(0);
9591     SDValue Op1 = TheXor->getOperand(1);
9592     if (Op0.getOpcode() == Op1.getOpcode()) {
9593       // Avoid missing important xor optimizations.
9594       if (SDValue Tmp = visitXOR(TheXor)) {
9595         if (Tmp.getNode() != TheXor) {
9596           DEBUG(dbgs() << "\nReplacing.8 ";
9597                 TheXor->dump(&DAG);
9598                 dbgs() << "\nWith: ";
9599                 Tmp.getNode()->dump(&DAG);
9600                 dbgs() << '\n');
9601           WorklistRemover DeadNodes(*this);
9602           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9603           deleteAndRecombine(TheXor);
9604           return DAG.getNode(ISD::BRCOND, SDLoc(N),
9605                              MVT::Other, Chain, Tmp, N2);
9606         }
9607 
9608         // visitXOR has changed XOR's operands or replaced the XOR completely,
9609         // bail out.
9610         return SDValue(N, 0);
9611       }
9612     }
9613 
9614     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9615       bool Equal = false;
9616       if (isOneConstant(Op0) && Op0.hasOneUse() &&
9617           Op0.getOpcode() == ISD::XOR) {
9618         TheXor = Op0.getNode();
9619         Equal = true;
9620       }
9621 
9622       EVT SetCCVT = N1.getValueType();
9623       if (LegalTypes)
9624         SetCCVT = getSetCCResultType(SetCCVT);
9625       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9626                                    SetCCVT,
9627                                    Op0, Op1,
9628                                    Equal ? ISD::SETEQ : ISD::SETNE);
9629       // Replace the uses of XOR with SETCC
9630       WorklistRemover DeadNodes(*this);
9631       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9632       deleteAndRecombine(N1.getNode());
9633       return DAG.getNode(ISD::BRCOND, SDLoc(N),
9634                          MVT::Other, Chain, SetCC, N2);
9635     }
9636   }
9637 
9638   return SDValue();
9639 }
9640 
9641 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9642 //
9643 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9644   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9645   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9646 
9647   // If N is a constant we could fold this into a fallthrough or unconditional
9648   // branch. However that doesn't happen very often in normal code, because
9649   // Instcombine/SimplifyCFG should have handled the available opportunities.
9650   // If we did this folding here, it would be necessary to update the
9651   // MachineBasicBlock CFG, which is awkward.
9652 
9653   // Use SimplifySetCC to simplify SETCC's.
9654   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9655                                CondLHS, CondRHS, CC->get(), SDLoc(N),
9656                                false);
9657   if (Simp.getNode()) AddToWorklist(Simp.getNode());
9658 
9659   // fold to a simpler setcc
9660   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9661     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9662                        N->getOperand(0), Simp.getOperand(2),
9663                        Simp.getOperand(0), Simp.getOperand(1),
9664                        N->getOperand(4));
9665 
9666   return SDValue();
9667 }
9668 
9669 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9670 /// and that N may be folded in the load / store addressing mode.
9671 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
9672                                     SelectionDAG &DAG,
9673                                     const TargetLowering &TLI) {
9674   EVT VT;
9675   unsigned AS;
9676 
9677   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
9678     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9679       return false;
9680     VT = LD->getMemoryVT();
9681     AS = LD->getAddressSpace();
9682   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
9683     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9684       return false;
9685     VT = ST->getMemoryVT();
9686     AS = ST->getAddressSpace();
9687   } else
9688     return false;
9689 
9690   TargetLowering::AddrMode AM;
9691   if (N->getOpcode() == ISD::ADD) {
9692     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9693     if (Offset)
9694       // [reg +/- imm]
9695       AM.BaseOffs = Offset->getSExtValue();
9696     else
9697       // [reg +/- reg]
9698       AM.Scale = 1;
9699   } else if (N->getOpcode() == ISD::SUB) {
9700     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
9701     if (Offset)
9702       // [reg +/- imm]
9703       AM.BaseOffs = -Offset->getSExtValue();
9704     else
9705       // [reg +/- reg]
9706       AM.Scale = 1;
9707   } else
9708     return false;
9709 
9710   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9711                                    VT.getTypeForEVT(*DAG.getContext()), AS);
9712 }
9713 
9714 /// Try turning a load/store into a pre-indexed load/store when the base
9715 /// pointer is an add or subtract and it has other uses besides the load/store.
9716 /// After the transformation, the new indexed load/store has effectively folded
9717 /// the add/subtract in and all of its other uses are redirected to the
9718 /// new load/store.
9719 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9720   if (Level < AfterLegalizeDAG)
9721     return false;
9722 
9723   bool isLoad = true;
9724   SDValue Ptr;
9725   EVT VT;
9726   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9727     if (LD->isIndexed())
9728       return false;
9729     VT = LD->getMemoryVT();
9730     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9731         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9732       return false;
9733     Ptr = LD->getBasePtr();
9734   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9735     if (ST->isIndexed())
9736       return false;
9737     VT = ST->getMemoryVT();
9738     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9739         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9740       return false;
9741     Ptr = ST->getBasePtr();
9742     isLoad = false;
9743   } else {
9744     return false;
9745   }
9746 
9747   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
9748   // out.  There is no reason to make this a preinc/predec.
9749   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
9750       Ptr.getNode()->hasOneUse())
9751     return false;
9752 
9753   // Ask the target to do addressing mode selection.
9754   SDValue BasePtr;
9755   SDValue Offset;
9756   ISD::MemIndexedMode AM = ISD::UNINDEXED;
9757   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
9758     return false;
9759 
9760   // Backends without true r+i pre-indexed forms may need to pass a
9761   // constant base with a variable offset so that constant coercion
9762   // will work with the patterns in canonical form.
9763   bool Swapped = false;
9764   if (isa<ConstantSDNode>(BasePtr)) {
9765     std::swap(BasePtr, Offset);
9766     Swapped = true;
9767   }
9768 
9769   // Don't create a indexed load / store with zero offset.
9770   if (isNullConstant(Offset))
9771     return false;
9772 
9773   // Try turning it into a pre-indexed load / store except when:
9774   // 1) The new base ptr is a frame index.
9775   // 2) If N is a store and the new base ptr is either the same as or is a
9776   //    predecessor of the value being stored.
9777   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
9778   //    that would create a cycle.
9779   // 4) All uses are load / store ops that use it as old base ptr.
9780 
9781   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
9782   // (plus the implicit offset) to a register to preinc anyway.
9783   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9784     return false;
9785 
9786   // Check #2.
9787   if (!isLoad) {
9788     SDValue Val = cast<StoreSDNode>(N)->getValue();
9789     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
9790       return false;
9791   }
9792 
9793   // Caches for hasPredecessorHelper.
9794   SmallPtrSet<const SDNode *, 32> Visited;
9795   SmallVector<const SDNode *, 16> Worklist;
9796   Worklist.push_back(N);
9797 
9798   // If the offset is a constant, there may be other adds of constants that
9799   // can be folded with this one. We should do this to avoid having to keep
9800   // a copy of the original base pointer.
9801   SmallVector<SDNode *, 16> OtherUses;
9802   if (isa<ConstantSDNode>(Offset))
9803     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
9804                               UE = BasePtr.getNode()->use_end();
9805          UI != UE; ++UI) {
9806       SDUse &Use = UI.getUse();
9807       // Skip the use that is Ptr and uses of other results from BasePtr's
9808       // node (important for nodes that return multiple results).
9809       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
9810         continue;
9811 
9812       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
9813         continue;
9814 
9815       if (Use.getUser()->getOpcode() != ISD::ADD &&
9816           Use.getUser()->getOpcode() != ISD::SUB) {
9817         OtherUses.clear();
9818         break;
9819       }
9820 
9821       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
9822       if (!isa<ConstantSDNode>(Op1)) {
9823         OtherUses.clear();
9824         break;
9825       }
9826 
9827       // FIXME: In some cases, we can be smarter about this.
9828       if (Op1.getValueType() != Offset.getValueType()) {
9829         OtherUses.clear();
9830         break;
9831       }
9832 
9833       OtherUses.push_back(Use.getUser());
9834     }
9835 
9836   if (Swapped)
9837     std::swap(BasePtr, Offset);
9838 
9839   // Now check for #3 and #4.
9840   bool RealUse = false;
9841 
9842   for (SDNode *Use : Ptr.getNode()->uses()) {
9843     if (Use == N)
9844       continue;
9845     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
9846       return false;
9847 
9848     // If Ptr may be folded in addressing mode of other use, then it's
9849     // not profitable to do this transformation.
9850     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
9851       RealUse = true;
9852   }
9853 
9854   if (!RealUse)
9855     return false;
9856 
9857   SDValue Result;
9858   if (isLoad)
9859     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9860                                 BasePtr, Offset, AM);
9861   else
9862     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9863                                  BasePtr, Offset, AM);
9864   ++PreIndexedNodes;
9865   ++NodesCombined;
9866   DEBUG(dbgs() << "\nReplacing.4 ";
9867         N->dump(&DAG);
9868         dbgs() << "\nWith: ";
9869         Result.getNode()->dump(&DAG);
9870         dbgs() << '\n');
9871   WorklistRemover DeadNodes(*this);
9872   if (isLoad) {
9873     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9874     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9875   } else {
9876     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9877   }
9878 
9879   // Finally, since the node is now dead, remove it from the graph.
9880   deleteAndRecombine(N);
9881 
9882   if (Swapped)
9883     std::swap(BasePtr, Offset);
9884 
9885   // Replace other uses of BasePtr that can be updated to use Ptr
9886   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
9887     unsigned OffsetIdx = 1;
9888     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
9889       OffsetIdx = 0;
9890     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
9891            BasePtr.getNode() && "Expected BasePtr operand");
9892 
9893     // We need to replace ptr0 in the following expression:
9894     //   x0 * offset0 + y0 * ptr0 = t0
9895     // knowing that
9896     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
9897     //
9898     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
9899     // indexed load/store and the expresion that needs to be re-written.
9900     //
9901     // Therefore, we have:
9902     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
9903 
9904     ConstantSDNode *CN =
9905       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
9906     int X0, X1, Y0, Y1;
9907     const APInt &Offset0 = CN->getAPIntValue();
9908     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
9909 
9910     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
9911     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
9912     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
9913     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
9914 
9915     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
9916 
9917     APInt CNV = Offset0;
9918     if (X0 < 0) CNV = -CNV;
9919     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
9920     else CNV = CNV - Offset1;
9921 
9922     SDLoc DL(OtherUses[i]);
9923 
9924     // We can now generate the new expression.
9925     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
9926     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
9927 
9928     SDValue NewUse = DAG.getNode(Opcode,
9929                                  DL,
9930                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
9931     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
9932     deleteAndRecombine(OtherUses[i]);
9933   }
9934 
9935   // Replace the uses of Ptr with uses of the updated base value.
9936   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
9937   deleteAndRecombine(Ptr.getNode());
9938 
9939   return true;
9940 }
9941 
9942 /// Try to combine a load/store with a add/sub of the base pointer node into a
9943 /// post-indexed load/store. The transformation folded the add/subtract into the
9944 /// new indexed load/store effectively and all of its uses are redirected to the
9945 /// new load/store.
9946 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
9947   if (Level < AfterLegalizeDAG)
9948     return false;
9949 
9950   bool isLoad = true;
9951   SDValue Ptr;
9952   EVT VT;
9953   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
9954     if (LD->isIndexed())
9955       return false;
9956     VT = LD->getMemoryVT();
9957     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
9958         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
9959       return false;
9960     Ptr = LD->getBasePtr();
9961   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
9962     if (ST->isIndexed())
9963       return false;
9964     VT = ST->getMemoryVT();
9965     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
9966         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
9967       return false;
9968     Ptr = ST->getBasePtr();
9969     isLoad = false;
9970   } else {
9971     return false;
9972   }
9973 
9974   if (Ptr.getNode()->hasOneUse())
9975     return false;
9976 
9977   for (SDNode *Op : Ptr.getNode()->uses()) {
9978     if (Op == N ||
9979         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
9980       continue;
9981 
9982     SDValue BasePtr;
9983     SDValue Offset;
9984     ISD::MemIndexedMode AM = ISD::UNINDEXED;
9985     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
9986       // Don't create a indexed load / store with zero offset.
9987       if (isNullConstant(Offset))
9988         continue;
9989 
9990       // Try turning it into a post-indexed load / store except when
9991       // 1) All uses are load / store ops that use it as base ptr (and
9992       //    it may be folded as addressing mmode).
9993       // 2) Op must be independent of N, i.e. Op is neither a predecessor
9994       //    nor a successor of N. Otherwise, if Op is folded that would
9995       //    create a cycle.
9996 
9997       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9998         continue;
9999 
10000       // Check for #1.
10001       bool TryNext = false;
10002       for (SDNode *Use : BasePtr.getNode()->uses()) {
10003         if (Use == Ptr.getNode())
10004           continue;
10005 
10006         // If all the uses are load / store addresses, then don't do the
10007         // transformation.
10008         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10009           bool RealUse = false;
10010           for (SDNode *UseUse : Use->uses()) {
10011             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10012               RealUse = true;
10013           }
10014 
10015           if (!RealUse) {
10016             TryNext = true;
10017             break;
10018           }
10019         }
10020       }
10021 
10022       if (TryNext)
10023         continue;
10024 
10025       // Check for #2
10026       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10027         SDValue Result = isLoad
10028           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10029                                BasePtr, Offset, AM)
10030           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10031                                 BasePtr, Offset, AM);
10032         ++PostIndexedNodes;
10033         ++NodesCombined;
10034         DEBUG(dbgs() << "\nReplacing.5 ";
10035               N->dump(&DAG);
10036               dbgs() << "\nWith: ";
10037               Result.getNode()->dump(&DAG);
10038               dbgs() << '\n');
10039         WorklistRemover DeadNodes(*this);
10040         if (isLoad) {
10041           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10042           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10043         } else {
10044           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10045         }
10046 
10047         // Finally, since the node is now dead, remove it from the graph.
10048         deleteAndRecombine(N);
10049 
10050         // Replace the uses of Use with uses of the updated base value.
10051         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
10052                                       Result.getValue(isLoad ? 1 : 0));
10053         deleteAndRecombine(Op);
10054         return true;
10055       }
10056     }
10057   }
10058 
10059   return false;
10060 }
10061 
10062 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10063 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10064   ISD::MemIndexedMode AM = LD->getAddressingMode();
10065   assert(AM != ISD::UNINDEXED);
10066   SDValue BP = LD->getOperand(1);
10067   SDValue Inc = LD->getOperand(2);
10068 
10069   // Some backends use TargetConstants for load offsets, but don't expect
10070   // TargetConstants in general ADD nodes. We can convert these constants into
10071   // regular Constants (if the constant is not opaque).
10072   assert((Inc.getOpcode() != ISD::TargetConstant ||
10073           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10074          "Cannot split out indexing using opaque target constants");
10075   if (Inc.getOpcode() == ISD::TargetConstant) {
10076     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10077     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10078                           ConstInc->getValueType(0));
10079   }
10080 
10081   unsigned Opc =
10082       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
10083   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
10084 }
10085 
10086 SDValue DAGCombiner::visitLOAD(SDNode *N) {
10087   LoadSDNode *LD  = cast<LoadSDNode>(N);
10088   SDValue Chain = LD->getChain();
10089   SDValue Ptr   = LD->getBasePtr();
10090 
10091   // If load is not volatile and there are no uses of the loaded value (and
10092   // the updated indexed value in case of indexed loads), change uses of the
10093   // chain value into uses of the chain input (i.e. delete the dead load).
10094   if (!LD->isVolatile()) {
10095     if (N->getValueType(1) == MVT::Other) {
10096       // Unindexed loads.
10097       if (!N->hasAnyUseOfValue(0)) {
10098         // It's not safe to use the two value CombineTo variant here. e.g.
10099         // v1, chain2 = load chain1, loc
10100         // v2, chain3 = load chain2, loc
10101         // v3         = add v2, c
10102         // Now we replace use of chain2 with chain1.  This makes the second load
10103         // isomorphic to the one we are deleting, and thus makes this load live.
10104         DEBUG(dbgs() << "\nReplacing.6 ";
10105               N->dump(&DAG);
10106               dbgs() << "\nWith chain: ";
10107               Chain.getNode()->dump(&DAG);
10108               dbgs() << "\n");
10109         WorklistRemover DeadNodes(*this);
10110         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10111 
10112         if (N->use_empty())
10113           deleteAndRecombine(N);
10114 
10115         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10116       }
10117     } else {
10118       // Indexed loads.
10119       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
10120 
10121       // If this load has an opaque TargetConstant offset, then we cannot split
10122       // the indexing into an add/sub directly (that TargetConstant may not be
10123       // valid for a different type of node, and we cannot convert an opaque
10124       // target constant into a regular constant).
10125       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
10126                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
10127 
10128       if (!N->hasAnyUseOfValue(0) &&
10129           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
10130         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
10131         SDValue Index;
10132         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
10133           Index = SplitIndexingFromLoad(LD);
10134           // Try to fold the base pointer arithmetic into subsequent loads and
10135           // stores.
10136           AddUsersToWorklist(N);
10137         } else
10138           Index = DAG.getUNDEF(N->getValueType(1));
10139         DEBUG(dbgs() << "\nReplacing.7 ";
10140               N->dump(&DAG);
10141               dbgs() << "\nWith: ";
10142               Undef.getNode()->dump(&DAG);
10143               dbgs() << " and 2 other values\n");
10144         WorklistRemover DeadNodes(*this);
10145         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
10146         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
10147         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
10148         deleteAndRecombine(N);
10149         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10150       }
10151     }
10152   }
10153 
10154   // If this load is directly stored, replace the load value with the stored
10155   // value.
10156   // TODO: Handle store large -> read small portion.
10157   // TODO: Handle TRUNCSTORE/LOADEXT
10158   if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
10159     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10160       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
10161       if (PrevST->getBasePtr() == Ptr &&
10162           PrevST->getValue().getValueType() == N->getValueType(0))
10163       return CombineTo(N, Chain.getOperand(1), Chain);
10164     }
10165   }
10166 
10167   // Try to infer better alignment information than the load already has.
10168   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
10169     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
10170       if (Align > LD->getMemOperand()->getBaseAlignment()) {
10171         SDValue NewLoad = DAG.getExtLoad(
10172             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
10173             LD->getPointerInfo(), LD->getMemoryVT(), Align,
10174             LD->getMemOperand()->getFlags(), LD->getAAInfo());
10175         if (NewLoad.getNode() != N)
10176           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
10177       }
10178     }
10179   }
10180 
10181   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10182                                                   : DAG.getSubtarget().useAA();
10183 #ifndef NDEBUG
10184   if (CombinerAAOnlyFunc.getNumOccurrences() &&
10185       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10186     UseAA = false;
10187 #endif
10188   if (UseAA && LD->isUnindexed()) {
10189     // Walk up chain skipping non-aliasing memory nodes.
10190     SDValue BetterChain = FindBetterChain(N, Chain);
10191 
10192     // If there is a better chain.
10193     if (Chain != BetterChain) {
10194       SDValue ReplLoad;
10195 
10196       // Replace the chain to void dependency.
10197       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
10198         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
10199                                BetterChain, Ptr, LD->getMemOperand());
10200       } else {
10201         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
10202                                   LD->getValueType(0),
10203                                   BetterChain, Ptr, LD->getMemoryVT(),
10204                                   LD->getMemOperand());
10205       }
10206 
10207       // Create token factor to keep old chain connected.
10208       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
10209                                   MVT::Other, Chain, ReplLoad.getValue(1));
10210 
10211       // Make sure the new and old chains are cleaned up.
10212       AddToWorklist(Token.getNode());
10213 
10214       // Replace uses with load result and token factor. Don't add users
10215       // to work list.
10216       return CombineTo(N, ReplLoad.getValue(0), Token, false);
10217     }
10218   }
10219 
10220   // Try transforming N to an indexed load.
10221   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10222     return SDValue(N, 0);
10223 
10224   // Try to slice up N to more direct loads if the slices are mapped to
10225   // different register banks or pairing can take place.
10226   if (SliceUpLoad(N))
10227     return SDValue(N, 0);
10228 
10229   return SDValue();
10230 }
10231 
10232 namespace {
10233 /// \brief Helper structure used to slice a load in smaller loads.
10234 /// Basically a slice is obtained from the following sequence:
10235 /// Origin = load Ty1, Base
10236 /// Shift = srl Ty1 Origin, CstTy Amount
10237 /// Inst = trunc Shift to Ty2
10238 ///
10239 /// Then, it will be rewriten into:
10240 /// Slice = load SliceTy, Base + SliceOffset
10241 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
10242 ///
10243 /// SliceTy is deduced from the number of bits that are actually used to
10244 /// build Inst.
10245 struct LoadedSlice {
10246   /// \brief Helper structure used to compute the cost of a slice.
10247   struct Cost {
10248     /// Are we optimizing for code size.
10249     bool ForCodeSize;
10250     /// Various cost.
10251     unsigned Loads;
10252     unsigned Truncates;
10253     unsigned CrossRegisterBanksCopies;
10254     unsigned ZExts;
10255     unsigned Shift;
10256 
10257     Cost(bool ForCodeSize = false)
10258         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
10259           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
10260 
10261     /// \brief Get the cost of one isolated slice.
10262     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
10263         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
10264           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
10265       EVT TruncType = LS.Inst->getValueType(0);
10266       EVT LoadedType = LS.getLoadedType();
10267       if (TruncType != LoadedType &&
10268           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
10269         ZExts = 1;
10270     }
10271 
10272     /// \brief Account for slicing gain in the current cost.
10273     /// Slicing provide a few gains like removing a shift or a
10274     /// truncate. This method allows to grow the cost of the original
10275     /// load with the gain from this slice.
10276     void addSliceGain(const LoadedSlice &LS) {
10277       // Each slice saves a truncate.
10278       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
10279       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
10280                               LS.Inst->getValueType(0)))
10281         ++Truncates;
10282       // If there is a shift amount, this slice gets rid of it.
10283       if (LS.Shift)
10284         ++Shift;
10285       // If this slice can merge a cross register bank copy, account for it.
10286       if (LS.canMergeExpensiveCrossRegisterBankCopy())
10287         ++CrossRegisterBanksCopies;
10288     }
10289 
10290     Cost &operator+=(const Cost &RHS) {
10291       Loads += RHS.Loads;
10292       Truncates += RHS.Truncates;
10293       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
10294       ZExts += RHS.ZExts;
10295       Shift += RHS.Shift;
10296       return *this;
10297     }
10298 
10299     bool operator==(const Cost &RHS) const {
10300       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
10301              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
10302              ZExts == RHS.ZExts && Shift == RHS.Shift;
10303     }
10304 
10305     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
10306 
10307     bool operator<(const Cost &RHS) const {
10308       // Assume cross register banks copies are as expensive as loads.
10309       // FIXME: Do we want some more target hooks?
10310       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
10311       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
10312       // Unless we are optimizing for code size, consider the
10313       // expensive operation first.
10314       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
10315         return ExpensiveOpsLHS < ExpensiveOpsRHS;
10316       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
10317              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
10318     }
10319 
10320     bool operator>(const Cost &RHS) const { return RHS < *this; }
10321 
10322     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
10323 
10324     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
10325   };
10326   // The last instruction that represent the slice. This should be a
10327   // truncate instruction.
10328   SDNode *Inst;
10329   // The original load instruction.
10330   LoadSDNode *Origin;
10331   // The right shift amount in bits from the original load.
10332   unsigned Shift;
10333   // The DAG from which Origin came from.
10334   // This is used to get some contextual information about legal types, etc.
10335   SelectionDAG *DAG;
10336 
10337   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
10338               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
10339       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
10340 
10341   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
10342   /// \return Result is \p BitWidth and has used bits set to 1 and
10343   ///         not used bits set to 0.
10344   APInt getUsedBits() const {
10345     // Reproduce the trunc(lshr) sequence:
10346     // - Start from the truncated value.
10347     // - Zero extend to the desired bit width.
10348     // - Shift left.
10349     assert(Origin && "No original load to compare against.");
10350     unsigned BitWidth = Origin->getValueSizeInBits(0);
10351     assert(Inst && "This slice is not bound to an instruction");
10352     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
10353            "Extracted slice is bigger than the whole type!");
10354     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
10355     UsedBits.setAllBits();
10356     UsedBits = UsedBits.zext(BitWidth);
10357     UsedBits <<= Shift;
10358     return UsedBits;
10359   }
10360 
10361   /// \brief Get the size of the slice to be loaded in bytes.
10362   unsigned getLoadedSize() const {
10363     unsigned SliceSize = getUsedBits().countPopulation();
10364     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
10365     return SliceSize / 8;
10366   }
10367 
10368   /// \brief Get the type that will be loaded for this slice.
10369   /// Note: This may not be the final type for the slice.
10370   EVT getLoadedType() const {
10371     assert(DAG && "Missing context");
10372     LLVMContext &Ctxt = *DAG->getContext();
10373     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
10374   }
10375 
10376   /// \brief Get the alignment of the load used for this slice.
10377   unsigned getAlignment() const {
10378     unsigned Alignment = Origin->getAlignment();
10379     unsigned Offset = getOffsetFromBase();
10380     if (Offset != 0)
10381       Alignment = MinAlign(Alignment, Alignment + Offset);
10382     return Alignment;
10383   }
10384 
10385   /// \brief Check if this slice can be rewritten with legal operations.
10386   bool isLegal() const {
10387     // An invalid slice is not legal.
10388     if (!Origin || !Inst || !DAG)
10389       return false;
10390 
10391     // Offsets are for indexed load only, we do not handle that.
10392     if (!Origin->getOffset().isUndef())
10393       return false;
10394 
10395     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10396 
10397     // Check that the type is legal.
10398     EVT SliceType = getLoadedType();
10399     if (!TLI.isTypeLegal(SliceType))
10400       return false;
10401 
10402     // Check that the load is legal for this type.
10403     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
10404       return false;
10405 
10406     // Check that the offset can be computed.
10407     // 1. Check its type.
10408     EVT PtrType = Origin->getBasePtr().getValueType();
10409     if (PtrType == MVT::Untyped || PtrType.isExtended())
10410       return false;
10411 
10412     // 2. Check that it fits in the immediate.
10413     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
10414       return false;
10415 
10416     // 3. Check that the computation is legal.
10417     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
10418       return false;
10419 
10420     // Check that the zext is legal if it needs one.
10421     EVT TruncateType = Inst->getValueType(0);
10422     if (TruncateType != SliceType &&
10423         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
10424       return false;
10425 
10426     return true;
10427   }
10428 
10429   /// \brief Get the offset in bytes of this slice in the original chunk of
10430   /// bits.
10431   /// \pre DAG != nullptr.
10432   uint64_t getOffsetFromBase() const {
10433     assert(DAG && "Missing context.");
10434     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
10435     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
10436     uint64_t Offset = Shift / 8;
10437     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
10438     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
10439            "The size of the original loaded type is not a multiple of a"
10440            " byte.");
10441     // If Offset is bigger than TySizeInBytes, it means we are loading all
10442     // zeros. This should have been optimized before in the process.
10443     assert(TySizeInBytes > Offset &&
10444            "Invalid shift amount for given loaded size");
10445     if (IsBigEndian)
10446       Offset = TySizeInBytes - Offset - getLoadedSize();
10447     return Offset;
10448   }
10449 
10450   /// \brief Generate the sequence of instructions to load the slice
10451   /// represented by this object and redirect the uses of this slice to
10452   /// this new sequence of instructions.
10453   /// \pre this->Inst && this->Origin are valid Instructions and this
10454   /// object passed the legal check: LoadedSlice::isLegal returned true.
10455   /// \return The last instruction of the sequence used to load the slice.
10456   SDValue loadSlice() const {
10457     assert(Inst && Origin && "Unable to replace a non-existing slice.");
10458     const SDValue &OldBaseAddr = Origin->getBasePtr();
10459     SDValue BaseAddr = OldBaseAddr;
10460     // Get the offset in that chunk of bytes w.r.t. the endianess.
10461     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
10462     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
10463     if (Offset) {
10464       // BaseAddr = BaseAddr + Offset.
10465       EVT ArithType = BaseAddr.getValueType();
10466       SDLoc DL(Origin);
10467       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
10468                               DAG->getConstant(Offset, DL, ArithType));
10469     }
10470 
10471     // Create the type of the loaded slice according to its size.
10472     EVT SliceType = getLoadedType();
10473 
10474     // Create the load for the slice.
10475     SDValue LastInst =
10476         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
10477                      Origin->getPointerInfo().getWithOffset(Offset),
10478                      getAlignment(), Origin->getMemOperand()->getFlags());
10479     // If the final type is not the same as the loaded type, this means that
10480     // we have to pad with zero. Create a zero extend for that.
10481     EVT FinalType = Inst->getValueType(0);
10482     if (SliceType != FinalType)
10483       LastInst =
10484           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
10485     return LastInst;
10486   }
10487 
10488   /// \brief Check if this slice can be merged with an expensive cross register
10489   /// bank copy. E.g.,
10490   /// i = load i32
10491   /// f = bitcast i32 i to float
10492   bool canMergeExpensiveCrossRegisterBankCopy() const {
10493     if (!Inst || !Inst->hasOneUse())
10494       return false;
10495     SDNode *Use = *Inst->use_begin();
10496     if (Use->getOpcode() != ISD::BITCAST)
10497       return false;
10498     assert(DAG && "Missing context");
10499     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10500     EVT ResVT = Use->getValueType(0);
10501     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
10502     const TargetRegisterClass *ArgRC =
10503         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
10504     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
10505       return false;
10506 
10507     // At this point, we know that we perform a cross-register-bank copy.
10508     // Check if it is expensive.
10509     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
10510     // Assume bitcasts are cheap, unless both register classes do not
10511     // explicitly share a common sub class.
10512     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
10513       return false;
10514 
10515     // Check if it will be merged with the load.
10516     // 1. Check the alignment constraint.
10517     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
10518         ResVT.getTypeForEVT(*DAG->getContext()));
10519 
10520     if (RequiredAlignment > getAlignment())
10521       return false;
10522 
10523     // 2. Check that the load is a legal operation for that type.
10524     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
10525       return false;
10526 
10527     // 3. Check that we do not have a zext in the way.
10528     if (Inst->getValueType(0) != getLoadedType())
10529       return false;
10530 
10531     return true;
10532   }
10533 };
10534 }
10535 
10536 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
10537 /// \p UsedBits looks like 0..0 1..1 0..0.
10538 static bool areUsedBitsDense(const APInt &UsedBits) {
10539   // If all the bits are one, this is dense!
10540   if (UsedBits.isAllOnesValue())
10541     return true;
10542 
10543   // Get rid of the unused bits on the right.
10544   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
10545   // Get rid of the unused bits on the left.
10546   if (NarrowedUsedBits.countLeadingZeros())
10547     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
10548   // Check that the chunk of bits is completely used.
10549   return NarrowedUsedBits.isAllOnesValue();
10550 }
10551 
10552 /// \brief Check whether or not \p First and \p Second are next to each other
10553 /// in memory. This means that there is no hole between the bits loaded
10554 /// by \p First and the bits loaded by \p Second.
10555 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10556                                      const LoadedSlice &Second) {
10557   assert(First.Origin == Second.Origin && First.Origin &&
10558          "Unable to match different memory origins.");
10559   APInt UsedBits = First.getUsedBits();
10560   assert((UsedBits & Second.getUsedBits()) == 0 &&
10561          "Slices are not supposed to overlap.");
10562   UsedBits |= Second.getUsedBits();
10563   return areUsedBitsDense(UsedBits);
10564 }
10565 
10566 /// \brief Adjust the \p GlobalLSCost according to the target
10567 /// paring capabilities and the layout of the slices.
10568 /// \pre \p GlobalLSCost should account for at least as many loads as
10569 /// there is in the slices in \p LoadedSlices.
10570 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10571                                  LoadedSlice::Cost &GlobalLSCost) {
10572   unsigned NumberOfSlices = LoadedSlices.size();
10573   // If there is less than 2 elements, no pairing is possible.
10574   if (NumberOfSlices < 2)
10575     return;
10576 
10577   // Sort the slices so that elements that are likely to be next to each
10578   // other in memory are next to each other in the list.
10579   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10580             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10581     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10582     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10583   });
10584   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10585   // First (resp. Second) is the first (resp. Second) potentially candidate
10586   // to be placed in a paired load.
10587   const LoadedSlice *First = nullptr;
10588   const LoadedSlice *Second = nullptr;
10589   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10590                 // Set the beginning of the pair.
10591                                                            First = Second) {
10592 
10593     Second = &LoadedSlices[CurrSlice];
10594 
10595     // If First is NULL, it means we start a new pair.
10596     // Get to the next slice.
10597     if (!First)
10598       continue;
10599 
10600     EVT LoadedType = First->getLoadedType();
10601 
10602     // If the types of the slices are different, we cannot pair them.
10603     if (LoadedType != Second->getLoadedType())
10604       continue;
10605 
10606     // Check if the target supplies paired loads for this type.
10607     unsigned RequiredAlignment = 0;
10608     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10609       // move to the next pair, this type is hopeless.
10610       Second = nullptr;
10611       continue;
10612     }
10613     // Check if we meet the alignment requirement.
10614     if (RequiredAlignment > First->getAlignment())
10615       continue;
10616 
10617     // Check that both loads are next to each other in memory.
10618     if (!areSlicesNextToEachOther(*First, *Second))
10619       continue;
10620 
10621     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10622     --GlobalLSCost.Loads;
10623     // Move to the next pair.
10624     Second = nullptr;
10625   }
10626 }
10627 
10628 /// \brief Check the profitability of all involved LoadedSlice.
10629 /// Currently, it is considered profitable if there is exactly two
10630 /// involved slices (1) which are (2) next to each other in memory, and
10631 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10632 ///
10633 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10634 /// the elements themselves.
10635 ///
10636 /// FIXME: When the cost model will be mature enough, we can relax
10637 /// constraints (1) and (2).
10638 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
10639                                 const APInt &UsedBits, bool ForCodeSize) {
10640   unsigned NumberOfSlices = LoadedSlices.size();
10641   if (StressLoadSlicing)
10642     return NumberOfSlices > 1;
10643 
10644   // Check (1).
10645   if (NumberOfSlices != 2)
10646     return false;
10647 
10648   // Check (2).
10649   if (!areUsedBitsDense(UsedBits))
10650     return false;
10651 
10652   // Check (3).
10653   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10654   // The original code has one big load.
10655   OrigCost.Loads = 1;
10656   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10657     const LoadedSlice &LS = LoadedSlices[CurrSlice];
10658     // Accumulate the cost of all the slices.
10659     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10660     GlobalSlicingCost += SliceCost;
10661 
10662     // Account as cost in the original configuration the gain obtained
10663     // with the current slices.
10664     OrigCost.addSliceGain(LS);
10665   }
10666 
10667   // If the target supports paired load, adjust the cost accordingly.
10668   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10669   return OrigCost > GlobalSlicingCost;
10670 }
10671 
10672 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10673 /// operations, split it in the various pieces being extracted.
10674 ///
10675 /// This sort of thing is introduced by SROA.
10676 /// This slicing takes care not to insert overlapping loads.
10677 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10678 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10679   if (Level < AfterLegalizeDAG)
10680     return false;
10681 
10682   LoadSDNode *LD = cast<LoadSDNode>(N);
10683   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10684       !LD->getValueType(0).isInteger())
10685     return false;
10686 
10687   // Keep track of already used bits to detect overlapping values.
10688   // In that case, we will just abort the transformation.
10689   APInt UsedBits(LD->getValueSizeInBits(0), 0);
10690 
10691   SmallVector<LoadedSlice, 4> LoadedSlices;
10692 
10693   // Check if this load is used as several smaller chunks of bits.
10694   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10695   // of computation for each trunc.
10696   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10697        UI != UIEnd; ++UI) {
10698     // Skip the uses of the chain.
10699     if (UI.getUse().getResNo() != 0)
10700       continue;
10701 
10702     SDNode *User = *UI;
10703     unsigned Shift = 0;
10704 
10705     // Check if this is a trunc(lshr).
10706     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10707         isa<ConstantSDNode>(User->getOperand(1))) {
10708       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10709       User = *User->use_begin();
10710     }
10711 
10712     // At this point, User is a Truncate, iff we encountered, trunc or
10713     // trunc(lshr).
10714     if (User->getOpcode() != ISD::TRUNCATE)
10715       return false;
10716 
10717     // The width of the type must be a power of 2 and greater than 8-bits.
10718     // Otherwise the load cannot be represented in LLVM IR.
10719     // Moreover, if we shifted with a non-8-bits multiple, the slice
10720     // will be across several bytes. We do not support that.
10721     unsigned Width = User->getValueSizeInBits(0);
10722     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10723       return 0;
10724 
10725     // Build the slice for this chain of computations.
10726     LoadedSlice LS(User, LD, Shift, &DAG);
10727     APInt CurrentUsedBits = LS.getUsedBits();
10728 
10729     // Check if this slice overlaps with another.
10730     if ((CurrentUsedBits & UsedBits) != 0)
10731       return false;
10732     // Update the bits used globally.
10733     UsedBits |= CurrentUsedBits;
10734 
10735     // Check if the new slice would be legal.
10736     if (!LS.isLegal())
10737       return false;
10738 
10739     // Record the slice.
10740     LoadedSlices.push_back(LS);
10741   }
10742 
10743   // Abort slicing if it does not seem to be profitable.
10744   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10745     return false;
10746 
10747   ++SlicedLoads;
10748 
10749   // Rewrite each chain to use an independent load.
10750   // By construction, each chain can be represented by a unique load.
10751 
10752   // Prepare the argument for the new token factor for all the slices.
10753   SmallVector<SDValue, 8> ArgChains;
10754   for (SmallVectorImpl<LoadedSlice>::const_iterator
10755            LSIt = LoadedSlices.begin(),
10756            LSItEnd = LoadedSlices.end();
10757        LSIt != LSItEnd; ++LSIt) {
10758     SDValue SliceInst = LSIt->loadSlice();
10759     CombineTo(LSIt->Inst, SliceInst, true);
10760     if (SliceInst.getOpcode() != ISD::LOAD)
10761       SliceInst = SliceInst.getOperand(0);
10762     assert(SliceInst->getOpcode() == ISD::LOAD &&
10763            "It takes more than a zext to get to the loaded slice!!");
10764     ArgChains.push_back(SliceInst.getValue(1));
10765   }
10766 
10767   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
10768                               ArgChains);
10769   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10770   return true;
10771 }
10772 
10773 /// Check to see if V is (and load (ptr), imm), where the load is having
10774 /// specific bytes cleared out.  If so, return the byte size being masked out
10775 /// and the shift amount.
10776 static std::pair<unsigned, unsigned>
10777 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
10778   std::pair<unsigned, unsigned> Result(0, 0);
10779 
10780   // Check for the structure we're looking for.
10781   if (V->getOpcode() != ISD::AND ||
10782       !isa<ConstantSDNode>(V->getOperand(1)) ||
10783       !ISD::isNormalLoad(V->getOperand(0).getNode()))
10784     return Result;
10785 
10786   // Check the chain and pointer.
10787   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
10788   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
10789 
10790   // The store should be chained directly to the load or be an operand of a
10791   // tokenfactor.
10792   if (LD == Chain.getNode())
10793     ; // ok.
10794   else if (Chain->getOpcode() != ISD::TokenFactor)
10795     return Result; // Fail.
10796   else {
10797     bool isOk = false;
10798     for (const SDValue &ChainOp : Chain->op_values())
10799       if (ChainOp.getNode() == LD) {
10800         isOk = true;
10801         break;
10802       }
10803     if (!isOk) return Result;
10804   }
10805 
10806   // This only handles simple types.
10807   if (V.getValueType() != MVT::i16 &&
10808       V.getValueType() != MVT::i32 &&
10809       V.getValueType() != MVT::i64)
10810     return Result;
10811 
10812   // Check the constant mask.  Invert it so that the bits being masked out are
10813   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
10814   // follow the sign bit for uniformity.
10815   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
10816   unsigned NotMaskLZ = countLeadingZeros(NotMask);
10817   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
10818   unsigned NotMaskTZ = countTrailingZeros(NotMask);
10819   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
10820   if (NotMaskLZ == 64) return Result;  // All zero mask.
10821 
10822   // See if we have a continuous run of bits.  If so, we have 0*1+0*
10823   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
10824     return Result;
10825 
10826   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
10827   if (V.getValueType() != MVT::i64 && NotMaskLZ)
10828     NotMaskLZ -= 64-V.getValueSizeInBits();
10829 
10830   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
10831   switch (MaskedBytes) {
10832   case 1:
10833   case 2:
10834   case 4: break;
10835   default: return Result; // All one mask, or 5-byte mask.
10836   }
10837 
10838   // Verify that the first bit starts at a multiple of mask so that the access
10839   // is aligned the same as the access width.
10840   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
10841 
10842   Result.first = MaskedBytes;
10843   Result.second = NotMaskTZ/8;
10844   return Result;
10845 }
10846 
10847 
10848 /// Check to see if IVal is something that provides a value as specified by
10849 /// MaskInfo. If so, replace the specified store with a narrower store of
10850 /// truncated IVal.
10851 static SDNode *
10852 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
10853                                 SDValue IVal, StoreSDNode *St,
10854                                 DAGCombiner *DC) {
10855   unsigned NumBytes = MaskInfo.first;
10856   unsigned ByteShift = MaskInfo.second;
10857   SelectionDAG &DAG = DC->getDAG();
10858 
10859   // Check to see if IVal is all zeros in the part being masked in by the 'or'
10860   // that uses this.  If not, this is not a replacement.
10861   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
10862                                   ByteShift*8, (ByteShift+NumBytes)*8);
10863   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
10864 
10865   // Check that it is legal on the target to do this.  It is legal if the new
10866   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
10867   // legalization.
10868   MVT VT = MVT::getIntegerVT(NumBytes*8);
10869   if (!DC->isTypeLegal(VT))
10870     return nullptr;
10871 
10872   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
10873   // shifted by ByteShift and truncated down to NumBytes.
10874   if (ByteShift) {
10875     SDLoc DL(IVal);
10876     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
10877                        DAG.getConstant(ByteShift*8, DL,
10878                                     DC->getShiftAmountTy(IVal.getValueType())));
10879   }
10880 
10881   // Figure out the offset for the store and the alignment of the access.
10882   unsigned StOffset;
10883   unsigned NewAlign = St->getAlignment();
10884 
10885   if (DAG.getDataLayout().isLittleEndian())
10886     StOffset = ByteShift;
10887   else
10888     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
10889 
10890   SDValue Ptr = St->getBasePtr();
10891   if (StOffset) {
10892     SDLoc DL(IVal);
10893     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
10894                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
10895     NewAlign = MinAlign(NewAlign, StOffset);
10896   }
10897 
10898   // Truncate down to the new size.
10899   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
10900 
10901   ++OpsNarrowed;
10902   return DAG
10903       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
10904                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
10905       .getNode();
10906 }
10907 
10908 
10909 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
10910 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
10911 /// narrowing the load and store if it would end up being a win for performance
10912 /// or code size.
10913 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
10914   StoreSDNode *ST  = cast<StoreSDNode>(N);
10915   if (ST->isVolatile())
10916     return SDValue();
10917 
10918   SDValue Chain = ST->getChain();
10919   SDValue Value = ST->getValue();
10920   SDValue Ptr   = ST->getBasePtr();
10921   EVT VT = Value.getValueType();
10922 
10923   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
10924     return SDValue();
10925 
10926   unsigned Opc = Value.getOpcode();
10927 
10928   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
10929   // is a byte mask indicating a consecutive number of bytes, check to see if
10930   // Y is known to provide just those bytes.  If so, we try to replace the
10931   // load + replace + store sequence with a single (narrower) store, which makes
10932   // the load dead.
10933   if (Opc == ISD::OR) {
10934     std::pair<unsigned, unsigned> MaskedLoad;
10935     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
10936     if (MaskedLoad.first)
10937       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10938                                                   Value.getOperand(1), ST,this))
10939         return SDValue(NewST, 0);
10940 
10941     // Or is commutative, so try swapping X and Y.
10942     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
10943     if (MaskedLoad.first)
10944       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10945                                                   Value.getOperand(0), ST,this))
10946         return SDValue(NewST, 0);
10947   }
10948 
10949   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
10950       Value.getOperand(1).getOpcode() != ISD::Constant)
10951     return SDValue();
10952 
10953   SDValue N0 = Value.getOperand(0);
10954   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10955       Chain == SDValue(N0.getNode(), 1)) {
10956     LoadSDNode *LD = cast<LoadSDNode>(N0);
10957     if (LD->getBasePtr() != Ptr ||
10958         LD->getPointerInfo().getAddrSpace() !=
10959         ST->getPointerInfo().getAddrSpace())
10960       return SDValue();
10961 
10962     // Find the type to narrow it the load / op / store to.
10963     SDValue N1 = Value.getOperand(1);
10964     unsigned BitWidth = N1.getValueSizeInBits();
10965     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
10966     if (Opc == ISD::AND)
10967       Imm ^= APInt::getAllOnesValue(BitWidth);
10968     if (Imm == 0 || Imm.isAllOnesValue())
10969       return SDValue();
10970     unsigned ShAmt = Imm.countTrailingZeros();
10971     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
10972     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
10973     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10974     // The narrowing should be profitable, the load/store operation should be
10975     // legal (or custom) and the store size should be equal to the NewVT width.
10976     while (NewBW < BitWidth &&
10977            (NewVT.getStoreSizeInBits() != NewBW ||
10978             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
10979             !TLI.isNarrowingProfitable(VT, NewVT))) {
10980       NewBW = NextPowerOf2(NewBW);
10981       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10982     }
10983     if (NewBW >= BitWidth)
10984       return SDValue();
10985 
10986     // If the lsb changed does not start at the type bitwidth boundary,
10987     // start at the previous one.
10988     if (ShAmt % NewBW)
10989       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
10990     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
10991                                    std::min(BitWidth, ShAmt + NewBW));
10992     if ((Imm & Mask) == Imm) {
10993       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
10994       if (Opc == ISD::AND)
10995         NewImm ^= APInt::getAllOnesValue(NewBW);
10996       uint64_t PtrOff = ShAmt / 8;
10997       // For big endian targets, we need to adjust the offset to the pointer to
10998       // load the correct bytes.
10999       if (DAG.getDataLayout().isBigEndian())
11000         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11001 
11002       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11003       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11004       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11005         return SDValue();
11006 
11007       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11008                                    Ptr.getValueType(), Ptr,
11009                                    DAG.getConstant(PtrOff, SDLoc(LD),
11010                                                    Ptr.getValueType()));
11011       SDValue NewLD =
11012           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11013                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11014                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
11015       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11016                                    DAG.getConstant(NewImm, SDLoc(Value),
11017                                                    NewVT));
11018       SDValue NewST =
11019           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11020                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11021 
11022       AddToWorklist(NewPtr.getNode());
11023       AddToWorklist(NewLD.getNode());
11024       AddToWorklist(NewVal.getNode());
11025       WorklistRemover DeadNodes(*this);
11026       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11027       ++OpsNarrowed;
11028       return NewST;
11029     }
11030   }
11031 
11032   return SDValue();
11033 }
11034 
11035 /// For a given floating point load / store pair, if the load value isn't used
11036 /// by any other operations, then consider transforming the pair to integer
11037 /// load / store operations if the target deems the transformation profitable.
11038 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11039   StoreSDNode *ST  = cast<StoreSDNode>(N);
11040   SDValue Chain = ST->getChain();
11041   SDValue Value = ST->getValue();
11042   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11043       Value.hasOneUse() &&
11044       Chain == SDValue(Value.getNode(), 1)) {
11045     LoadSDNode *LD = cast<LoadSDNode>(Value);
11046     EVT VT = LD->getMemoryVT();
11047     if (!VT.isFloatingPoint() ||
11048         VT != ST->getMemoryVT() ||
11049         LD->isNonTemporal() ||
11050         ST->isNonTemporal() ||
11051         LD->getPointerInfo().getAddrSpace() != 0 ||
11052         ST->getPointerInfo().getAddrSpace() != 0)
11053       return SDValue();
11054 
11055     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11056     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11057         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11058         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
11059         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
11060       return SDValue();
11061 
11062     unsigned LDAlign = LD->getAlignment();
11063     unsigned STAlign = ST->getAlignment();
11064     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11065     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11066     if (LDAlign < ABIAlign || STAlign < ABIAlign)
11067       return SDValue();
11068 
11069     SDValue NewLD =
11070         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11071                     LD->getPointerInfo(), LDAlign);
11072 
11073     SDValue NewST =
11074         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11075                      ST->getPointerInfo(), STAlign);
11076 
11077     AddToWorklist(NewLD.getNode());
11078     AddToWorklist(NewST.getNode());
11079     WorklistRemover DeadNodes(*this);
11080     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11081     ++LdStFP2Int;
11082     return NewST;
11083   }
11084 
11085   return SDValue();
11086 }
11087 
11088 namespace {
11089 /// Helper struct to parse and store a memory address as base + index + offset.
11090 /// We ignore sign extensions when it is safe to do so.
11091 /// The following two expressions are not equivalent. To differentiate we need
11092 /// to store whether there was a sign extension involved in the index
11093 /// computation.
11094 ///  (load (i64 add (i64 copyfromreg %c)
11095 ///                 (i64 signextend (add (i8 load %index)
11096 ///                                      (i8 1))))
11097 /// vs
11098 ///
11099 /// (load (i64 add (i64 copyfromreg %c)
11100 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
11101 ///                                         (i32 1)))))
11102 struct BaseIndexOffset {
11103   SDValue Base;
11104   SDValue Index;
11105   int64_t Offset;
11106   bool IsIndexSignExt;
11107 
11108   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
11109 
11110   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
11111                   bool IsIndexSignExt) :
11112     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
11113 
11114   bool equalBaseIndex(const BaseIndexOffset &Other) {
11115     return Other.Base == Base && Other.Index == Index &&
11116       Other.IsIndexSignExt == IsIndexSignExt;
11117   }
11118 
11119   /// Parses tree in Ptr for base, index, offset addresses.
11120   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
11121     bool IsIndexSignExt = false;
11122 
11123     // Split up a folded GlobalAddress+Offset into its component parts.
11124     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
11125       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
11126         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
11127                                                     SDLoc(GA),
11128                                                     GA->getValueType(0),
11129                                                     /*Offset=*/0,
11130                                                     /*isTargetGA=*/false,
11131                                                     GA->getTargetFlags()),
11132                                SDValue(),
11133                                GA->getOffset(),
11134                                IsIndexSignExt);
11135       }
11136 
11137     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
11138     // instruction, then it could be just the BASE or everything else we don't
11139     // know how to handle. Just use Ptr as BASE and give up.
11140     if (Ptr->getOpcode() != ISD::ADD)
11141       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11142 
11143     // We know that we have at least an ADD instruction. Try to pattern match
11144     // the simple case of BASE + OFFSET.
11145     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
11146       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
11147       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
11148                               IsIndexSignExt);
11149     }
11150 
11151     // Inside a loop the current BASE pointer is calculated using an ADD and a
11152     // MUL instruction. In this case Ptr is the actual BASE pointer.
11153     // (i64 add (i64 %array_ptr)
11154     //          (i64 mul (i64 %induction_var)
11155     //                   (i64 %element_size)))
11156     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
11157       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11158 
11159     // Look at Base + Index + Offset cases.
11160     SDValue Base = Ptr->getOperand(0);
11161     SDValue IndexOffset = Ptr->getOperand(1);
11162 
11163     // Skip signextends.
11164     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
11165       IndexOffset = IndexOffset->getOperand(0);
11166       IsIndexSignExt = true;
11167     }
11168 
11169     // Either the case of Base + Index (no offset) or something else.
11170     if (IndexOffset->getOpcode() != ISD::ADD)
11171       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
11172 
11173     // Now we have the case of Base + Index + offset.
11174     SDValue Index = IndexOffset->getOperand(0);
11175     SDValue Offset = IndexOffset->getOperand(1);
11176 
11177     if (!isa<ConstantSDNode>(Offset))
11178       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
11179 
11180     // Ignore signextends.
11181     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
11182       Index = Index->getOperand(0);
11183       IsIndexSignExt = true;
11184     } else IsIndexSignExt = false;
11185 
11186     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
11187     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
11188   }
11189 };
11190 } // namespace
11191 
11192 // This is a helper function for visitMUL to check the profitability
11193 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
11194 // MulNode is the original multiply, AddNode is (add x, c1),
11195 // and ConstNode is c2.
11196 //
11197 // If the (add x, c1) has multiple uses, we could increase
11198 // the number of adds if we make this transformation.
11199 // It would only be worth doing this if we can remove a
11200 // multiply in the process. Check for that here.
11201 // To illustrate:
11202 //     (A + c1) * c3
11203 //     (A + c2) * c3
11204 // We're checking for cases where we have common "c3 * A" expressions.
11205 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
11206                                               SDValue &AddNode,
11207                                               SDValue &ConstNode) {
11208   APInt Val;
11209 
11210   // If the add only has one use, this would be OK to do.
11211   if (AddNode.getNode()->hasOneUse())
11212     return true;
11213 
11214   // Walk all the users of the constant with which we're multiplying.
11215   for (SDNode *Use : ConstNode->uses()) {
11216 
11217     if (Use == MulNode) // This use is the one we're on right now. Skip it.
11218       continue;
11219 
11220     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
11221       SDNode *OtherOp;
11222       SDNode *MulVar = AddNode.getOperand(0).getNode();
11223 
11224       // OtherOp is what we're multiplying against the constant.
11225       if (Use->getOperand(0) == ConstNode)
11226         OtherOp = Use->getOperand(1).getNode();
11227       else
11228         OtherOp = Use->getOperand(0).getNode();
11229 
11230       // Check to see if multiply is with the same operand of our "add".
11231       //
11232       //     ConstNode  = CONST
11233       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
11234       //     ...
11235       //     AddNode  = (A + c1)  <-- MulVar is A.
11236       //         = AddNode * ConstNode   <-- current visiting instruction.
11237       //
11238       // If we make this transformation, we will have a common
11239       // multiply (ConstNode * A) that we can save.
11240       if (OtherOp == MulVar)
11241         return true;
11242 
11243       // Now check to see if a future expansion will give us a common
11244       // multiply.
11245       //
11246       //     ConstNode  = CONST
11247       //     AddNode    = (A + c1)
11248       //     ...   = AddNode * ConstNode <-- current visiting instruction.
11249       //     ...
11250       //     OtherOp = (A + c2)
11251       //     Use     = OtherOp * ConstNode <-- visiting Use.
11252       //
11253       // If we make this transformation, we will have a common
11254       // multiply (CONST * A) after we also do the same transformation
11255       // to the "t2" instruction.
11256       if (OtherOp->getOpcode() == ISD::ADD &&
11257           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
11258           OtherOp->getOperand(0).getNode() == MulVar)
11259         return true;
11260     }
11261   }
11262 
11263   // Didn't find a case where this would be profitable.
11264   return false;
11265 }
11266 
11267 SDValue DAGCombiner::getMergedConstantVectorStore(
11268     SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
11269     SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
11270   SmallVector<SDValue, 8> BuildVector;
11271 
11272   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11273     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
11274     Chains.push_back(St->getChain());
11275     BuildVector.push_back(St->getValue());
11276   }
11277 
11278   return DAG.getBuildVector(Ty, SL, BuildVector);
11279 }
11280 
11281 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
11282                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
11283                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
11284   // Make sure we have something to merge.
11285   if (NumStores < 2)
11286     return false;
11287 
11288   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11289   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11290   unsigned LatestNodeUsed = 0;
11291 
11292   for (unsigned i=0; i < NumStores; ++i) {
11293     // Find a chain for the new wide-store operand. Notice that some
11294     // of the store nodes that we found may not be selected for inclusion
11295     // in the wide store. The chain we use needs to be the chain of the
11296     // latest store node which is *used* and replaced by the wide store.
11297     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11298       LatestNodeUsed = i;
11299   }
11300 
11301   SmallVector<SDValue, 8> Chains;
11302 
11303   // The latest Node in the DAG.
11304   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11305   SDLoc DL(StoreNodes[0].MemNode);
11306 
11307   SDValue StoredVal;
11308   if (UseVector) {
11309     bool IsVec = MemVT.isVector();
11310     unsigned Elts = NumStores;
11311     if (IsVec) {
11312       // When merging vector stores, get the total number of elements.
11313       Elts *= MemVT.getVectorNumElements();
11314     }
11315     // Get the type for the merged vector store.
11316     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11317     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
11318 
11319     if (IsConstantSrc) {
11320       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
11321     } else {
11322       SmallVector<SDValue, 8> Ops;
11323       for (unsigned i = 0; i < NumStores; ++i) {
11324         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11325         SDValue Val = St->getValue();
11326         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
11327         if (Val.getValueType() != MemVT)
11328           return false;
11329         Ops.push_back(Val);
11330         Chains.push_back(St->getChain());
11331       }
11332 
11333       // Build the extracted vector elements back into a vector.
11334       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
11335                               DL, Ty, Ops);    }
11336   } else {
11337     // We should always use a vector store when merging extracted vector
11338     // elements, so this path implies a store of constants.
11339     assert(IsConstantSrc && "Merged vector elements should use vector store");
11340 
11341     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
11342     APInt StoreInt(SizeInBits, 0);
11343 
11344     // Construct a single integer constant which is made of the smaller
11345     // constant inputs.
11346     bool IsLE = DAG.getDataLayout().isLittleEndian();
11347     for (unsigned i = 0; i < NumStores; ++i) {
11348       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
11349       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
11350       Chains.push_back(St->getChain());
11351 
11352       SDValue Val = St->getValue();
11353       StoreInt <<= ElementSizeBytes * 8;
11354       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
11355         StoreInt |= C->getAPIntValue().zext(SizeInBits);
11356       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
11357         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
11358       } else {
11359         llvm_unreachable("Invalid constant element type");
11360       }
11361     }
11362 
11363     // Create the new Load and Store operations.
11364     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11365     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
11366   }
11367 
11368   assert(!Chains.empty());
11369 
11370   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11371   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
11372                                   FirstInChain->getBasePtr(),
11373                                   FirstInChain->getPointerInfo(),
11374                                   FirstInChain->getAlignment());
11375 
11376   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11377                                                   : DAG.getSubtarget().useAA();
11378   if (UseAA) {
11379     // Replace all merged stores with the new store.
11380     for (unsigned i = 0; i < NumStores; ++i)
11381       CombineTo(StoreNodes[i].MemNode, NewStore);
11382   } else {
11383     // Replace the last store with the new store.
11384     CombineTo(LatestOp, NewStore);
11385     // Erase all other stores.
11386     for (unsigned i = 0; i < NumStores; ++i) {
11387       if (StoreNodes[i].MemNode == LatestOp)
11388         continue;
11389       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11390       // ReplaceAllUsesWith will replace all uses that existed when it was
11391       // called, but graph optimizations may cause new ones to appear. For
11392       // example, the case in pr14333 looks like
11393       //
11394       //  St's chain -> St -> another store -> X
11395       //
11396       // And the only difference from St to the other store is the chain.
11397       // When we change it's chain to be St's chain they become identical,
11398       // get CSEed and the net result is that X is now a use of St.
11399       // Since we know that St is redundant, just iterate.
11400       while (!St->use_empty())
11401         DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11402       deleteAndRecombine(St);
11403     }
11404   }
11405 
11406   return true;
11407 }
11408 
11409 void DAGCombiner::getStoreMergeAndAliasCandidates(
11410     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
11411     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
11412   // This holds the base pointer, index, and the offset in bytes from the base
11413   // pointer.
11414   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11415 
11416   // We must have a base and an offset.
11417   if (!BasePtr.Base.getNode())
11418     return;
11419 
11420   // Do not handle stores to undef base pointers.
11421   if (BasePtr.Base.isUndef())
11422     return;
11423 
11424   // Walk up the chain and look for nodes with offsets from the same
11425   // base pointer. Stop when reaching an instruction with a different kind
11426   // or instruction which has a different base pointer.
11427   EVT MemVT = St->getMemoryVT();
11428   unsigned Seq = 0;
11429   StoreSDNode *Index = St;
11430 
11431 
11432   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11433                                                   : DAG.getSubtarget().useAA();
11434 
11435   if (UseAA) {
11436     // Look at other users of the same chain. Stores on the same chain do not
11437     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11438     // to be on the same chain, so don't bother looking at adjacent chains.
11439 
11440     SDValue Chain = St->getChain();
11441     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11442       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
11443         if (I.getOperandNo() != 0)
11444           continue;
11445 
11446         if (OtherST->isVolatile() || OtherST->isIndexed())
11447           continue;
11448 
11449         if (OtherST->getMemoryVT() != MemVT)
11450           continue;
11451 
11452         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11453 
11454         if (Ptr.equalBaseIndex(BasePtr))
11455           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11456       }
11457     }
11458 
11459     return;
11460   }
11461 
11462   while (Index) {
11463     // If the chain has more than one use, then we can't reorder the mem ops.
11464     if (Index != St && !SDValue(Index, 0)->hasOneUse())
11465       break;
11466 
11467     // Find the base pointer and offset for this memory node.
11468     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11469 
11470     // Check that the base pointer is the same as the original one.
11471     if (!Ptr.equalBaseIndex(BasePtr))
11472       break;
11473 
11474     // The memory operands must not be volatile.
11475     if (Index->isVolatile() || Index->isIndexed())
11476       break;
11477 
11478     // No truncation.
11479     if (Index->isTruncatingStore())
11480       break;
11481 
11482     // The stored memory type must be the same.
11483     if (Index->getMemoryVT() != MemVT)
11484       break;
11485 
11486     // We do not allow under-aligned stores in order to prevent
11487     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11488     // be irrelevant here; what MATTERS is that we not move memory
11489     // operations that potentially overlap past each-other.
11490     if (Index->getAlignment() < MemVT.getStoreSize())
11491       break;
11492 
11493     // We found a potential memory operand to merge.
11494     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11495 
11496     // Find the next memory operand in the chain. If the next operand in the
11497     // chain is a store then move up and continue the scan with the next
11498     // memory operand. If the next operand is a load save it and use alias
11499     // information to check if it interferes with anything.
11500     SDNode *NextInChain = Index->getChain().getNode();
11501     while (1) {
11502       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
11503         // We found a store node. Use it for the next iteration.
11504         Index = STn;
11505         break;
11506       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
11507         if (Ldn->isVolatile()) {
11508           Index = nullptr;
11509           break;
11510         }
11511 
11512         // Save the load node for later. Continue the scan.
11513         AliasLoadNodes.push_back(Ldn);
11514         NextInChain = Ldn->getChain().getNode();
11515         continue;
11516       } else {
11517         Index = nullptr;
11518         break;
11519       }
11520     }
11521   }
11522 }
11523 
11524 // We need to check that merging these stores does not cause a loop
11525 // in the DAG. Any store candidate may depend on another candidate
11526 // indirectly through its operand (we already consider dependencies
11527 // through the chain). Check in parallel by searching up from
11528 // non-chain operands of candidates.
11529 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
11530     SmallVectorImpl<MemOpLink> &StoreNodes) {
11531   SmallPtrSet<const SDNode *, 16> Visited;
11532   SmallVector<const SDNode *, 8> Worklist;
11533   // search ops of store candidates
11534   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11535     SDNode *n = StoreNodes[i].MemNode;
11536     // Potential loops may happen only through non-chain operands
11537     for (unsigned j = 1; j < n->getNumOperands(); ++j)
11538       Worklist.push_back(n->getOperand(j).getNode());
11539   }
11540   // search through DAG. We can stop early if we find a storenode
11541   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11542     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
11543       return false;
11544   }
11545   return true;
11546 }
11547 
11548 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
11549   if (OptLevel == CodeGenOpt::None)
11550     return false;
11551 
11552   EVT MemVT = St->getMemoryVT();
11553   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11554   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
11555       Attribute::NoImplicitFloat);
11556 
11557   // This function cannot currently deal with non-byte-sized memory sizes.
11558   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
11559     return false;
11560 
11561   if (!MemVT.isSimple())
11562     return false;
11563 
11564   // Perform an early exit check. Do not bother looking at stored values that
11565   // are not constants, loads, or extracted vector elements.
11566   SDValue StoredVal = St->getValue();
11567   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
11568   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
11569                        isa<ConstantFPSDNode>(StoredVal);
11570   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11571                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
11572 
11573   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
11574     return false;
11575 
11576   // Don't merge vectors into wider vectors if the source data comes from loads.
11577   // TODO: This restriction can be lifted by using logic similar to the
11578   // ExtractVecSrc case.
11579   if (MemVT.isVector() && IsLoadSrc)
11580     return false;
11581 
11582   // Only look at ends of store sequences.
11583   SDValue Chain = SDValue(St, 0);
11584   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
11585     return false;
11586 
11587   // Save the LoadSDNodes that we find in the chain.
11588   // We need to make sure that these nodes do not interfere with
11589   // any of the store nodes.
11590   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
11591 
11592   // Save the StoreSDNodes that we find in the chain.
11593   SmallVector<MemOpLink, 8> StoreNodes;
11594 
11595   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
11596 
11597   // Check if there is anything to merge.
11598   if (StoreNodes.size() < 2)
11599     return false;
11600 
11601   // only do dependence check in AA case
11602   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11603                                                   : DAG.getSubtarget().useAA();
11604   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
11605     return false;
11606 
11607   // Sort the memory operands according to their distance from the
11608   // base pointer.  As a secondary criteria: make sure stores coming
11609   // later in the code come first in the list. This is important for
11610   // the non-UseAA case, because we're merging stores into the FINAL
11611   // store along a chain which potentially contains aliasing stores.
11612   // Thus, if there are multiple stores to the same address, the last
11613   // one can be considered for merging but not the others.
11614   std::sort(StoreNodes.begin(), StoreNodes.end(),
11615             [](MemOpLink LHS, MemOpLink RHS) {
11616     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
11617            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
11618             LHS.SequenceNum < RHS.SequenceNum);
11619   });
11620 
11621   // Scan the memory operations on the chain and find the first non-consecutive
11622   // store memory address.
11623   unsigned LastConsecutiveStore = 0;
11624   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
11625   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
11626 
11627     // Check that the addresses are consecutive starting from the second
11628     // element in the list of stores.
11629     if (i > 0) {
11630       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
11631       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11632         break;
11633     }
11634 
11635     // Check if this store interferes with any of the loads that we found.
11636     // If we find a load that alias with this store. Stop the sequence.
11637     if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
11638           return isAlias(Ldn, StoreNodes[i].MemNode);
11639         }))
11640       break;
11641 
11642     // Mark this node as useful.
11643     LastConsecutiveStore = i;
11644   }
11645 
11646   // The node with the lowest store address.
11647   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11648   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
11649   unsigned FirstStoreAlign = FirstInChain->getAlignment();
11650   LLVMContext &Context = *DAG.getContext();
11651   const DataLayout &DL = DAG.getDataLayout();
11652 
11653   // Store the constants into memory as one consecutive store.
11654   if (IsConstantSrc) {
11655     unsigned LastLegalType = 0;
11656     unsigned LastLegalVectorType = 0;
11657     bool NonZero = false;
11658     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11659       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11660       SDValue StoredVal = St->getValue();
11661 
11662       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
11663         NonZero |= !C->isNullValue();
11664       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
11665         NonZero |= !C->getConstantFPValue()->isNullValue();
11666       } else {
11667         // Non-constant.
11668         break;
11669       }
11670 
11671       // Find a legal type for the constant store.
11672       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11673       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11674       bool IsFast;
11675       if (TLI.isTypeLegal(StoreTy) &&
11676           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11677                                  FirstStoreAlign, &IsFast) && IsFast) {
11678         LastLegalType = i+1;
11679       // Or check whether a truncstore is legal.
11680       } else if (TLI.getTypeAction(Context, StoreTy) ==
11681                  TargetLowering::TypePromoteInteger) {
11682         EVT LegalizedStoredValueTy =
11683           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
11684         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11685             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11686                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
11687             IsFast) {
11688           LastLegalType = i + 1;
11689         }
11690       }
11691 
11692       // We only use vectors if the constant is known to be zero or the target
11693       // allows it and the function is not marked with the noimplicitfloat
11694       // attribute.
11695       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
11696                                                         FirstStoreAS)) &&
11697           !NoVectors) {
11698         // Find a legal type for the vector store.
11699         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
11700         if (TLI.isTypeLegal(Ty) &&
11701             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11702                                    FirstStoreAlign, &IsFast) && IsFast)
11703           LastLegalVectorType = i + 1;
11704       }
11705     }
11706 
11707     // Check if we found a legal integer type to store.
11708     if (LastLegalType == 0 && LastLegalVectorType == 0)
11709       return false;
11710 
11711     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
11712     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
11713 
11714     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11715                                            true, UseVector);
11716   }
11717 
11718   // When extracting multiple vector elements, try to store them
11719   // in one vector store rather than a sequence of scalar stores.
11720   if (IsExtractVecSrc) {
11721     unsigned NumStoresToMerge = 0;
11722     bool IsVec = MemVT.isVector();
11723     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
11724       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11725       unsigned StoreValOpcode = St->getValue().getOpcode();
11726       // This restriction could be loosened.
11727       // Bail out if any stored values are not elements extracted from a vector.
11728       // It should be possible to handle mixed sources, but load sources need
11729       // more careful handling (see the block of code below that handles
11730       // consecutive loads).
11731       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
11732           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
11733         return false;
11734 
11735       // Find a legal type for the vector store.
11736       unsigned Elts = i + 1;
11737       if (IsVec) {
11738         // When merging vector stores, get the total number of elements.
11739         Elts *= MemVT.getVectorNumElements();
11740       }
11741       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11742       bool IsFast;
11743       if (TLI.isTypeLegal(Ty) &&
11744           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11745                                  FirstStoreAlign, &IsFast) && IsFast)
11746         NumStoresToMerge = i + 1;
11747     }
11748 
11749     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
11750                                            false, true);
11751   }
11752 
11753   // Below we handle the case of multiple consecutive stores that
11754   // come from multiple consecutive loads. We merge them into a single
11755   // wide load and a single wide store.
11756 
11757   // Look for load nodes which are used by the stored values.
11758   SmallVector<MemOpLink, 8> LoadNodes;
11759 
11760   // Find acceptable loads. Loads need to have the same chain (token factor),
11761   // must not be zext, volatile, indexed, and they must be consecutive.
11762   BaseIndexOffset LdBasePtr;
11763   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11764     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
11765     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11766     if (!Ld) break;
11767 
11768     // Loads must only have one use.
11769     if (!Ld->hasNUsesOfValue(1, 0))
11770       break;
11771 
11772     // The memory operands must not be volatile.
11773     if (Ld->isVolatile() || Ld->isIndexed())
11774       break;
11775 
11776     // We do not accept ext loads.
11777     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11778       break;
11779 
11780     // The stored memory type must be the same.
11781     if (Ld->getMemoryVT() != MemVT)
11782       break;
11783 
11784     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
11785     // If this is not the first ptr that we check.
11786     if (LdBasePtr.Base.getNode()) {
11787       // The base ptr must be the same.
11788       if (!LdPtr.equalBaseIndex(LdBasePtr))
11789         break;
11790     } else {
11791       // Check that all other base pointers are the same as this one.
11792       LdBasePtr = LdPtr;
11793     }
11794 
11795     // We found a potential memory operand to merge.
11796     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11797   }
11798 
11799   if (LoadNodes.size() < 2)
11800     return false;
11801 
11802   // If we have load/store pair instructions and we only have two values,
11803   // don't bother.
11804   unsigned RequiredAlignment;
11805   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11806       St->getAlignment() >= RequiredAlignment)
11807     return false;
11808 
11809   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11810   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11811   unsigned FirstLoadAlign = FirstLoad->getAlignment();
11812 
11813   // Scan the memory operations on the chain and find the first non-consecutive
11814   // load memory address. These variables hold the index in the store node
11815   // array.
11816   unsigned LastConsecutiveLoad = 0;
11817   // This variable refers to the size and not index in the array.
11818   unsigned LastLegalVectorType = 0;
11819   unsigned LastLegalIntegerType = 0;
11820   StartAddress = LoadNodes[0].OffsetFromBase;
11821   SDValue FirstChain = FirstLoad->getChain();
11822   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11823     // All loads must share the same chain.
11824     if (LoadNodes[i].MemNode->getChain() != FirstChain)
11825       break;
11826 
11827     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11828     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11829       break;
11830     LastConsecutiveLoad = i;
11831     // Find a legal type for the vector store.
11832     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
11833     bool IsFastSt, IsFastLd;
11834     if (TLI.isTypeLegal(StoreTy) &&
11835         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11836                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11837         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11838                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
11839       LastLegalVectorType = i + 1;
11840     }
11841 
11842     // Find a legal type for the integer store.
11843     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11844     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11845     if (TLI.isTypeLegal(StoreTy) &&
11846         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11847                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
11848         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11849                                FirstLoadAlign, &IsFastLd) && IsFastLd)
11850       LastLegalIntegerType = i + 1;
11851     // Or check whether a truncstore and extload is legal.
11852     else if (TLI.getTypeAction(Context, StoreTy) ==
11853              TargetLowering::TypePromoteInteger) {
11854       EVT LegalizedStoredValueTy =
11855         TLI.getTypeToTransformTo(Context, StoreTy);
11856       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11857           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11858           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11859           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11860           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11861                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
11862           IsFastSt &&
11863           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11864                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
11865           IsFastLd)
11866         LastLegalIntegerType = i+1;
11867     }
11868   }
11869 
11870   // Only use vector types if the vector type is larger than the integer type.
11871   // If they are the same, use integers.
11872   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
11873   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
11874 
11875   // We add +1 here because the LastXXX variables refer to location while
11876   // the NumElem refers to array/index size.
11877   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
11878   NumElem = std::min(LastLegalType, NumElem);
11879 
11880   if (NumElem < 2)
11881     return false;
11882 
11883   // Collect the chains from all merged stores.
11884   SmallVector<SDValue, 8> MergeStoreChains;
11885   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
11886 
11887   // The latest Node in the DAG.
11888   unsigned LatestNodeUsed = 0;
11889   for (unsigned i=1; i<NumElem; ++i) {
11890     // Find a chain for the new wide-store operand. Notice that some
11891     // of the store nodes that we found may not be selected for inclusion
11892     // in the wide store. The chain we use needs to be the chain of the
11893     // latest store node which is *used* and replaced by the wide store.
11894     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11895       LatestNodeUsed = i;
11896 
11897     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
11898   }
11899 
11900   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11901 
11902   // Find if it is better to use vectors or integers to load and store
11903   // to memory.
11904   EVT JointMemOpVT;
11905   if (UseVectorTy) {
11906     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
11907   } else {
11908     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
11909     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
11910   }
11911 
11912   SDLoc LoadDL(LoadNodes[0].MemNode);
11913   SDLoc StoreDL(StoreNodes[0].MemNode);
11914 
11915   // The merged loads are required to have the same incoming chain, so
11916   // using the first's chain is acceptable.
11917   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
11918                                 FirstLoad->getBasePtr(),
11919                                 FirstLoad->getPointerInfo(), FirstLoadAlign);
11920 
11921   SDValue NewStoreChain =
11922     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
11923 
11924   SDValue NewStore =
11925       DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
11926                    FirstInChain->getPointerInfo(), FirstStoreAlign);
11927 
11928   // Transfer chain users from old loads to the new load.
11929   for (unsigned i = 0; i < NumElem; ++i) {
11930     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
11931     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
11932                                   SDValue(NewLoad.getNode(), 1));
11933   }
11934 
11935   if (UseAA) {
11936     // Replace the all stores with the new store.
11937     for (unsigned i = 0; i < NumElem; ++i)
11938       CombineTo(StoreNodes[i].MemNode, NewStore);
11939   } else {
11940     // Replace the last store with the new store.
11941     CombineTo(LatestOp, NewStore);
11942     // Erase all other stores.
11943     for (unsigned i = 0; i < NumElem; ++i) {
11944       // Remove all Store nodes.
11945       if (StoreNodes[i].MemNode == LatestOp)
11946         continue;
11947       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11948       DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
11949       deleteAndRecombine(St);
11950     }
11951   }
11952 
11953   return true;
11954 }
11955 
11956 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
11957   SDLoc SL(ST);
11958   SDValue ReplStore;
11959 
11960   // Replace the chain to avoid dependency.
11961   if (ST->isTruncatingStore()) {
11962     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
11963                                   ST->getBasePtr(), ST->getMemoryVT(),
11964                                   ST->getMemOperand());
11965   } else {
11966     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
11967                              ST->getMemOperand());
11968   }
11969 
11970   // Create token to keep both nodes around.
11971   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
11972                               MVT::Other, ST->getChain(), ReplStore);
11973 
11974   // Make sure the new and old chains are cleaned up.
11975   AddToWorklist(Token.getNode());
11976 
11977   // Don't add users to work list.
11978   return CombineTo(ST, Token, false);
11979 }
11980 
11981 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
11982   SDValue Value = ST->getValue();
11983   if (Value.getOpcode() == ISD::TargetConstantFP)
11984     return SDValue();
11985 
11986   SDLoc DL(ST);
11987 
11988   SDValue Chain = ST->getChain();
11989   SDValue Ptr = ST->getBasePtr();
11990 
11991   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
11992 
11993   // NOTE: If the original store is volatile, this transform must not increase
11994   // the number of stores.  For example, on x86-32 an f64 can be stored in one
11995   // processor operation but an i64 (which is not legal) requires two.  So the
11996   // transform should not be done in this case.
11997 
11998   SDValue Tmp;
11999   switch (CFP->getSimpleValueType(0).SimpleTy) {
12000   default:
12001     llvm_unreachable("Unknown FP type");
12002   case MVT::f16:    // We don't do this for these yet.
12003   case MVT::f80:
12004   case MVT::f128:
12005   case MVT::ppcf128:
12006     return SDValue();
12007   case MVT::f32:
12008     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12009         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12010       ;
12011       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12012                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12013                             MVT::i32);
12014       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12015     }
12016 
12017     return SDValue();
12018   case MVT::f64:
12019     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12020          !ST->isVolatile()) ||
12021         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
12022       ;
12023       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12024                             getZExtValue(), SDLoc(CFP), MVT::i64);
12025       return DAG.getStore(Chain, DL, Tmp,
12026                           Ptr, ST->getMemOperand());
12027     }
12028 
12029     if (!ST->isVolatile() &&
12030         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12031       // Many FP stores are not made apparent until after legalize, e.g. for
12032       // argument passing.  Since this is so common, custom legalize the
12033       // 64-bit integer store into two 32-bit stores.
12034       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12035       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12036       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12037       if (DAG.getDataLayout().isBigEndian())
12038         std::swap(Lo, Hi);
12039 
12040       unsigned Alignment = ST->getAlignment();
12041       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12042       AAMDNodes AAInfo = ST->getAAInfo();
12043 
12044       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12045                                  ST->getAlignment(), MMOFlags, AAInfo);
12046       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12047                         DAG.getConstant(4, DL, Ptr.getValueType()));
12048       Alignment = MinAlign(Alignment, 4U);
12049       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12050                                  ST->getPointerInfo().getWithOffset(4),
12051                                  Alignment, MMOFlags, AAInfo);
12052       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12053                          St0, St1);
12054     }
12055 
12056     return SDValue();
12057   }
12058 }
12059 
12060 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12061   StoreSDNode *ST  = cast<StoreSDNode>(N);
12062   SDValue Chain = ST->getChain();
12063   SDValue Value = ST->getValue();
12064   SDValue Ptr   = ST->getBasePtr();
12065 
12066   // If this is a store of a bit convert, store the input value if the
12067   // resultant store does not need a higher alignment than the original.
12068   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12069       ST->isUnindexed()) {
12070     EVT SVT = Value.getOperand(0).getValueType();
12071     if (((!LegalOperations && !ST->isVolatile()) ||
12072          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12073         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12074       unsigned OrigAlign = ST->getAlignment();
12075       bool Fast = false;
12076       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12077                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
12078           Fast) {
12079         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12080                             ST->getPointerInfo(), OrigAlign,
12081                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
12082       }
12083     }
12084   }
12085 
12086   // Turn 'store undef, Ptr' -> nothing.
12087   if (Value.isUndef() && ST->isUnindexed())
12088     return Chain;
12089 
12090   // Try to infer better alignment information than the store already has.
12091   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12092     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12093       if (Align > ST->getAlignment()) {
12094         SDValue NewStore =
12095             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12096                               ST->getMemoryVT(), Align,
12097                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
12098         if (NewStore.getNode() != N)
12099           return CombineTo(ST, NewStore, true);
12100       }
12101     }
12102   }
12103 
12104   // Try transforming a pair floating point load / store ops to integer
12105   // load / store ops.
12106   if (SDValue NewST = TransformFPLoadStorePair(N))
12107     return NewST;
12108 
12109   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12110                                                   : DAG.getSubtarget().useAA();
12111 #ifndef NDEBUG
12112   if (CombinerAAOnlyFunc.getNumOccurrences() &&
12113       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
12114     UseAA = false;
12115 #endif
12116   if (UseAA && ST->isUnindexed()) {
12117     // FIXME: We should do this even without AA enabled. AA will just allow
12118     // FindBetterChain to work in more situations. The problem with this is that
12119     // any combine that expects memory operations to be on consecutive chains
12120     // first needs to be updated to look for users of the same chain.
12121 
12122     // Walk up chain skipping non-aliasing memory nodes, on this store and any
12123     // adjacent stores.
12124     if (findBetterNeighborChains(ST)) {
12125       // replaceStoreChain uses CombineTo, which handled all of the worklist
12126       // manipulation. Return the original node to not do anything else.
12127       return SDValue(ST, 0);
12128     }
12129     Chain = ST->getChain();
12130   }
12131 
12132   // Try transforming N to an indexed store.
12133   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12134     return SDValue(N, 0);
12135 
12136   // FIXME: is there such a thing as a truncating indexed store?
12137   if (ST->isTruncatingStore() && ST->isUnindexed() &&
12138       Value.getValueType().isInteger()) {
12139     // See if we can simplify the input to this truncstore with knowledge that
12140     // only the low bits are being used.  For example:
12141     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
12142     SDValue Shorter =
12143       GetDemandedBits(Value,
12144                       APInt::getLowBitsSet(
12145                         Value.getValueType().getScalarType().getSizeInBits(),
12146                         ST->getMemoryVT().getScalarType().getSizeInBits()));
12147     AddToWorklist(Value.getNode());
12148     if (Shorter.getNode())
12149       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12150                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
12151 
12152     // Otherwise, see if we can simplify the operation with
12153     // SimplifyDemandedBits, which only works if the value has a single use.
12154     if (SimplifyDemandedBits(Value,
12155                         APInt::getLowBitsSet(
12156                           Value.getValueType().getScalarType().getSizeInBits(),
12157                           ST->getMemoryVT().getScalarType().getSizeInBits())))
12158       return SDValue(N, 0);
12159   }
12160 
12161   // If this is a load followed by a store to the same location, then the store
12162   // is dead/noop.
12163   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12164     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12165         ST->isUnindexed() && !ST->isVolatile() &&
12166         // There can't be any side effects between the load and store, such as
12167         // a call or store.
12168         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12169       // The store is dead, remove it.
12170       return Chain;
12171     }
12172   }
12173 
12174   // If this is a store followed by a store with the same value to the same
12175   // location, then the store is dead/noop.
12176   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12177     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12178         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12179         ST1->isUnindexed() && !ST1->isVolatile()) {
12180       // The store is dead, remove it.
12181       return Chain;
12182     }
12183   }
12184 
12185   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12186   // truncating store.  We can do this even if this is already a truncstore.
12187   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12188       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12189       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12190                             ST->getMemoryVT())) {
12191     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12192                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12193   }
12194 
12195   // Only perform this optimization before the types are legal, because we
12196   // don't want to perform this optimization on every DAGCombine invocation.
12197   if (!LegalTypes) {
12198     bool EverChanged = false;
12199 
12200     do {
12201       // There can be multiple store sequences on the same chain.
12202       // Keep trying to merge store sequences until we are unable to do so
12203       // or until we merge the last store on the chain.
12204       bool Changed = MergeConsecutiveStores(ST);
12205       EverChanged |= Changed;
12206       if (!Changed) break;
12207     } while (ST->getOpcode() != ISD::DELETED_NODE);
12208 
12209     if (EverChanged)
12210       return SDValue(N, 0);
12211   }
12212 
12213   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12214   //
12215   // Make sure to do this only after attempting to merge stores in order to
12216   //  avoid changing the types of some subset of stores due to visit order,
12217   //  preventing their merging.
12218   if (isa<ConstantFPSDNode>(Value)) {
12219     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12220       return NewSt;
12221   }
12222 
12223   if (SDValue NewSt = splitMergedValStore(ST))
12224     return NewSt;
12225 
12226   return ReduceLoadOpStoreWidth(N);
12227 }
12228 
12229 /// For the instruction sequence of store below, F and I values
12230 /// are bundled together as an i64 value before being stored into memory.
12231 /// Sometimes it is more efficent to generate separate stores for F and I,
12232 /// which can remove the bitwise instructions or sink them to colder places.
12233 ///
12234 ///   (store (or (zext (bitcast F to i32) to i64),
12235 ///              (shl (zext I to i64), 32)), addr)  -->
12236 ///   (store F, addr) and (store I, addr+4)
12237 ///
12238 /// Similarly, splitting for other merged store can also be beneficial, like:
12239 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12240 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12241 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12242 /// For pair of {i16, i8},  i32 store --> two i16 stores.
12243 /// For pair of {i8, i8},   i16 store --> two i8 stores.
12244 ///
12245 /// We allow each target to determine specifically which kind of splitting is
12246 /// supported.
12247 ///
12248 /// The store patterns are commonly seen from the simple code snippet below
12249 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12250 ///   void goo(const std::pair<int, float> &);
12251 ///   hoo() {
12252 ///     ...
12253 ///     goo(std::make_pair(tmp, ftmp));
12254 ///     ...
12255 ///   }
12256 ///
12257 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12258   if (OptLevel == CodeGenOpt::None)
12259     return SDValue();
12260 
12261   SDValue Val = ST->getValue();
12262   SDLoc DL(ST);
12263 
12264   // Match OR operand.
12265   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12266     return SDValue();
12267 
12268   // Match SHL operand and get Lower and Higher parts of Val.
12269   SDValue Op1 = Val.getOperand(0);
12270   SDValue Op2 = Val.getOperand(1);
12271   SDValue Lo, Hi;
12272   if (Op1.getOpcode() != ISD::SHL) {
12273     std::swap(Op1, Op2);
12274     if (Op1.getOpcode() != ISD::SHL)
12275       return SDValue();
12276   }
12277   Lo = Op2;
12278   Hi = Op1.getOperand(0);
12279   if (!Op1.hasOneUse())
12280     return SDValue();
12281 
12282   // Match shift amount to HalfValBitSize.
12283   unsigned HalfValBitSize = Val.getValueType().getSizeInBits() / 2;
12284   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
12285   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12286     return SDValue();
12287 
12288   // Lo and Hi are zero-extended from int with size less equal than 32
12289   // to i64.
12290   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12291       !Lo.getOperand(0).getValueType().isScalarInteger() ||
12292       Lo.getOperand(0).getValueType().getSizeInBits() > HalfValBitSize ||
12293       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12294       !Hi.getOperand(0).getValueType().isScalarInteger() ||
12295       Hi.getOperand(0).getValueType().getSizeInBits() > HalfValBitSize)
12296     return SDValue();
12297 
12298   if (!TLI.isMultiStoresCheaperThanBitsMerge(Lo.getOperand(0),
12299                                              Hi.getOperand(0)))
12300     return SDValue();
12301 
12302   // Start to split store.
12303   unsigned Alignment = ST->getAlignment();
12304   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12305   AAMDNodes AAInfo = ST->getAAInfo();
12306 
12307   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
12308   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
12309   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
12310   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
12311 
12312   SDValue Chain = ST->getChain();
12313   SDValue Ptr = ST->getBasePtr();
12314   // Lower value store.
12315   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12316                              ST->getAlignment(), MMOFlags, AAInfo);
12317   Ptr =
12318       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12319                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
12320   // Higher value store.
12321   SDValue St1 =
12322       DAG.getStore(Chain, DL, Hi, Ptr,
12323                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
12324                    Alignment / 2, MMOFlags, AAInfo);
12325   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1);
12326 }
12327 
12328 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
12329   SDValue InVec = N->getOperand(0);
12330   SDValue InVal = N->getOperand(1);
12331   SDValue EltNo = N->getOperand(2);
12332   SDLoc dl(N);
12333 
12334   // If the inserted element is an UNDEF, just use the input vector.
12335   if (InVal.isUndef())
12336     return InVec;
12337 
12338   EVT VT = InVec.getValueType();
12339 
12340   // If we can't generate a legal BUILD_VECTOR, exit
12341   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
12342     return SDValue();
12343 
12344   // Check that we know which element is being inserted
12345   if (!isa<ConstantSDNode>(EltNo))
12346     return SDValue();
12347   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12348 
12349   // Canonicalize insert_vector_elt dag nodes.
12350   // Example:
12351   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
12352   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
12353   //
12354   // Do this only if the child insert_vector node has one use; also
12355   // do this only if indices are both constants and Idx1 < Idx0.
12356   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
12357       && isa<ConstantSDNode>(InVec.getOperand(2))) {
12358     unsigned OtherElt =
12359       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
12360     if (Elt < OtherElt) {
12361       // Swap nodes.
12362       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
12363                                   InVec.getOperand(0), InVal, EltNo);
12364       AddToWorklist(NewOp.getNode());
12365       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
12366                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
12367     }
12368   }
12369 
12370   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
12371   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
12372   // vector elements.
12373   SmallVector<SDValue, 8> Ops;
12374   // Do not combine these two vectors if the output vector will not replace
12375   // the input vector.
12376   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
12377     Ops.append(InVec.getNode()->op_begin(),
12378                InVec.getNode()->op_end());
12379   } else if (InVec.isUndef()) {
12380     unsigned NElts = VT.getVectorNumElements();
12381     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
12382   } else {
12383     return SDValue();
12384   }
12385 
12386   // Insert the element
12387   if (Elt < Ops.size()) {
12388     // All the operands of BUILD_VECTOR must have the same type;
12389     // we enforce that here.
12390     EVT OpVT = Ops[0].getValueType();
12391     if (InVal.getValueType() != OpVT)
12392       InVal = OpVT.bitsGT(InVal.getValueType()) ?
12393                 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
12394                 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
12395     Ops[Elt] = InVal;
12396   }
12397 
12398   // Return the new vector
12399   return DAG.getBuildVector(VT, dl, Ops);
12400 }
12401 
12402 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
12403     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
12404   assert(!OriginalLoad->isVolatile());
12405 
12406   EVT ResultVT = EVE->getValueType(0);
12407   EVT VecEltVT = InVecVT.getVectorElementType();
12408   unsigned Align = OriginalLoad->getAlignment();
12409   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
12410       VecEltVT.getTypeForEVT(*DAG.getContext()));
12411 
12412   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12413     return SDValue();
12414 
12415   Align = NewAlign;
12416 
12417   SDValue NewPtr = OriginalLoad->getBasePtr();
12418   SDValue Offset;
12419   EVT PtrType = NewPtr.getValueType();
12420   MachinePointerInfo MPI;
12421   SDLoc DL(EVE);
12422   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12423     int Elt = ConstEltNo->getZExtValue();
12424     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12425     Offset = DAG.getConstant(PtrOff, DL, PtrType);
12426     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12427   } else {
12428     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
12429     Offset = DAG.getNode(
12430         ISD::MUL, DL, PtrType, Offset,
12431         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
12432     MPI = OriginalLoad->getPointerInfo();
12433   }
12434   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
12435 
12436   // The replacement we need to do here is a little tricky: we need to
12437   // replace an extractelement of a load with a load.
12438   // Use ReplaceAllUsesOfValuesWith to do the replacement.
12439   // Note that this replacement assumes that the extractvalue is the only
12440   // use of the load; that's okay because we don't want to perform this
12441   // transformation in other cases anyway.
12442   SDValue Load;
12443   SDValue Chain;
12444   if (ResultVT.bitsGT(VecEltVT)) {
12445     // If the result type of vextract is wider than the load, then issue an
12446     // extending load instead.
12447     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
12448                                                   VecEltVT)
12449                                    ? ISD::ZEXTLOAD
12450                                    : ISD::EXTLOAD;
12451     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
12452                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
12453                           Align, OriginalLoad->getMemOperand()->getFlags(),
12454                           OriginalLoad->getAAInfo());
12455     Chain = Load.getValue(1);
12456   } else {
12457     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
12458                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
12459                        OriginalLoad->getAAInfo());
12460     Chain = Load.getValue(1);
12461     if (ResultVT.bitsLT(VecEltVT))
12462       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
12463     else
12464       Load = DAG.getBitcast(ResultVT, Load);
12465   }
12466   WorklistRemover DeadNodes(*this);
12467   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
12468   SDValue To[] = { Load, Chain };
12469   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
12470   // Since we're explicitly calling ReplaceAllUses, add the new node to the
12471   // worklist explicitly as well.
12472   AddToWorklist(Load.getNode());
12473   AddUsersToWorklist(Load.getNode()); // Add users too
12474   // Make sure to revisit this node to clean it up; it will usually be dead.
12475   AddToWorklist(EVE);
12476   ++OpsNarrowed;
12477   return SDValue(EVE, 0);
12478 }
12479 
12480 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
12481   // (vextract (scalar_to_vector val, 0) -> val
12482   SDValue InVec = N->getOperand(0);
12483   EVT VT = InVec.getValueType();
12484   EVT NVT = N->getValueType(0);
12485 
12486   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
12487     // Check if the result type doesn't match the inserted element type. A
12488     // SCALAR_TO_VECTOR may truncate the inserted element and the
12489     // EXTRACT_VECTOR_ELT may widen the extracted vector.
12490     SDValue InOp = InVec.getOperand(0);
12491     if (InOp.getValueType() != NVT) {
12492       assert(InOp.getValueType().isInteger() && NVT.isInteger());
12493       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
12494     }
12495     return InOp;
12496   }
12497 
12498   SDValue EltNo = N->getOperand(1);
12499   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
12500 
12501   // extract_vector_elt (build_vector x, y), 1 -> y
12502   if (ConstEltNo &&
12503       InVec.getOpcode() == ISD::BUILD_VECTOR &&
12504       TLI.isTypeLegal(VT) &&
12505       (InVec.hasOneUse() ||
12506        TLI.aggressivelyPreferBuildVectorSources(VT))) {
12507     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
12508     EVT InEltVT = Elt.getValueType();
12509 
12510     // Sometimes build_vector's scalar input types do not match result type.
12511     if (NVT == InEltVT)
12512       return Elt;
12513 
12514     // TODO: It may be useful to truncate if free if the build_vector implicitly
12515     // converts.
12516   }
12517 
12518   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
12519   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
12520       ConstEltNo->isNullValue() && VT.isInteger()) {
12521     SDValue BCSrc = InVec.getOperand(0);
12522     if (BCSrc.getValueType().isScalarInteger())
12523       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
12524   }
12525 
12526   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
12527   //
12528   // This only really matters if the index is non-constant since other combines
12529   // on the constant elements already work.
12530   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12531       EltNo == InVec.getOperand(2)) {
12532     SDValue Elt = InVec.getOperand(1);
12533     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
12534   }
12535 
12536   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
12537   // We only perform this optimization before the op legalization phase because
12538   // we may introduce new vector instructions which are not backed by TD
12539   // patterns. For example on AVX, extracting elements from a wide vector
12540   // without using extract_subvector. However, if we can find an underlying
12541   // scalar value, then we can always use that.
12542   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
12543     int NumElem = VT.getVectorNumElements();
12544     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
12545     // Find the new index to extract from.
12546     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
12547 
12548     // Extracting an undef index is undef.
12549     if (OrigElt == -1)
12550       return DAG.getUNDEF(NVT);
12551 
12552     // Select the right vector half to extract from.
12553     SDValue SVInVec;
12554     if (OrigElt < NumElem) {
12555       SVInVec = InVec->getOperand(0);
12556     } else {
12557       SVInVec = InVec->getOperand(1);
12558       OrigElt -= NumElem;
12559     }
12560 
12561     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
12562       SDValue InOp = SVInVec.getOperand(OrigElt);
12563       if (InOp.getValueType() != NVT) {
12564         assert(InOp.getValueType().isInteger() && NVT.isInteger());
12565         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
12566       }
12567 
12568       return InOp;
12569     }
12570 
12571     // FIXME: We should handle recursing on other vector shuffles and
12572     // scalar_to_vector here as well.
12573 
12574     if (!LegalOperations) {
12575       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12576       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
12577                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
12578     }
12579   }
12580 
12581   bool BCNumEltsChanged = false;
12582   EVT ExtVT = VT.getVectorElementType();
12583   EVT LVT = ExtVT;
12584 
12585   // If the result of load has to be truncated, then it's not necessarily
12586   // profitable.
12587   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
12588     return SDValue();
12589 
12590   if (InVec.getOpcode() == ISD::BITCAST) {
12591     // Don't duplicate a load with other uses.
12592     if (!InVec.hasOneUse())
12593       return SDValue();
12594 
12595     EVT BCVT = InVec.getOperand(0).getValueType();
12596     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
12597       return SDValue();
12598     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
12599       BCNumEltsChanged = true;
12600     InVec = InVec.getOperand(0);
12601     ExtVT = BCVT.getVectorElementType();
12602   }
12603 
12604   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
12605   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
12606       ISD::isNormalLoad(InVec.getNode()) &&
12607       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
12608     SDValue Index = N->getOperand(1);
12609     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
12610       if (!OrigLoad->isVolatile()) {
12611         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
12612                                                              OrigLoad);
12613       }
12614     }
12615   }
12616 
12617   // Perform only after legalization to ensure build_vector / vector_shuffle
12618   // optimizations have already been done.
12619   if (!LegalOperations) return SDValue();
12620 
12621   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
12622   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
12623   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
12624 
12625   if (ConstEltNo) {
12626     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12627 
12628     LoadSDNode *LN0 = nullptr;
12629     const ShuffleVectorSDNode *SVN = nullptr;
12630     if (ISD::isNormalLoad(InVec.getNode())) {
12631       LN0 = cast<LoadSDNode>(InVec);
12632     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
12633                InVec.getOperand(0).getValueType() == ExtVT &&
12634                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
12635       // Don't duplicate a load with other uses.
12636       if (!InVec.hasOneUse())
12637         return SDValue();
12638 
12639       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
12640     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
12641       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
12642       // =>
12643       // (load $addr+1*size)
12644 
12645       // Don't duplicate a load with other uses.
12646       if (!InVec.hasOneUse())
12647         return SDValue();
12648 
12649       // If the bit convert changed the number of elements, it is unsafe
12650       // to examine the mask.
12651       if (BCNumEltsChanged)
12652         return SDValue();
12653 
12654       // Select the input vector, guarding against out of range extract vector.
12655       unsigned NumElems = VT.getVectorNumElements();
12656       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
12657       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
12658 
12659       if (InVec.getOpcode() == ISD::BITCAST) {
12660         // Don't duplicate a load with other uses.
12661         if (!InVec.hasOneUse())
12662           return SDValue();
12663 
12664         InVec = InVec.getOperand(0);
12665       }
12666       if (ISD::isNormalLoad(InVec.getNode())) {
12667         LN0 = cast<LoadSDNode>(InVec);
12668         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
12669         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
12670       }
12671     }
12672 
12673     // Make sure we found a non-volatile load and the extractelement is
12674     // the only use.
12675     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
12676       return SDValue();
12677 
12678     // If Idx was -1 above, Elt is going to be -1, so just return undef.
12679     if (Elt == -1)
12680       return DAG.getUNDEF(LVT);
12681 
12682     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
12683   }
12684 
12685   return SDValue();
12686 }
12687 
12688 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
12689 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
12690   // We perform this optimization post type-legalization because
12691   // the type-legalizer often scalarizes integer-promoted vectors.
12692   // Performing this optimization before may create bit-casts which
12693   // will be type-legalized to complex code sequences.
12694   // We perform this optimization only before the operation legalizer because we
12695   // may introduce illegal operations.
12696   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
12697     return SDValue();
12698 
12699   unsigned NumInScalars = N->getNumOperands();
12700   SDLoc dl(N);
12701   EVT VT = N->getValueType(0);
12702 
12703   // Check to see if this is a BUILD_VECTOR of a bunch of values
12704   // which come from any_extend or zero_extend nodes. If so, we can create
12705   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
12706   // optimizations. We do not handle sign-extend because we can't fill the sign
12707   // using shuffles.
12708   EVT SourceType = MVT::Other;
12709   bool AllAnyExt = true;
12710 
12711   for (unsigned i = 0; i != NumInScalars; ++i) {
12712     SDValue In = N->getOperand(i);
12713     // Ignore undef inputs.
12714     if (In.isUndef()) continue;
12715 
12716     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
12717     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
12718 
12719     // Abort if the element is not an extension.
12720     if (!ZeroExt && !AnyExt) {
12721       SourceType = MVT::Other;
12722       break;
12723     }
12724 
12725     // The input is a ZeroExt or AnyExt. Check the original type.
12726     EVT InTy = In.getOperand(0).getValueType();
12727 
12728     // Check that all of the widened source types are the same.
12729     if (SourceType == MVT::Other)
12730       // First time.
12731       SourceType = InTy;
12732     else if (InTy != SourceType) {
12733       // Multiple income types. Abort.
12734       SourceType = MVT::Other;
12735       break;
12736     }
12737 
12738     // Check if all of the extends are ANY_EXTENDs.
12739     AllAnyExt &= AnyExt;
12740   }
12741 
12742   // In order to have valid types, all of the inputs must be extended from the
12743   // same source type and all of the inputs must be any or zero extend.
12744   // Scalar sizes must be a power of two.
12745   EVT OutScalarTy = VT.getScalarType();
12746   bool ValidTypes = SourceType != MVT::Other &&
12747                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
12748                  isPowerOf2_32(SourceType.getSizeInBits());
12749 
12750   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
12751   // turn into a single shuffle instruction.
12752   if (!ValidTypes)
12753     return SDValue();
12754 
12755   bool isLE = DAG.getDataLayout().isLittleEndian();
12756   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
12757   assert(ElemRatio > 1 && "Invalid element size ratio");
12758   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
12759                                DAG.getConstant(0, SDLoc(N), SourceType);
12760 
12761   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
12762   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
12763 
12764   // Populate the new build_vector
12765   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12766     SDValue Cast = N->getOperand(i);
12767     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
12768             Cast.getOpcode() == ISD::ZERO_EXTEND ||
12769             Cast.isUndef()) && "Invalid cast opcode");
12770     SDValue In;
12771     if (Cast.isUndef())
12772       In = DAG.getUNDEF(SourceType);
12773     else
12774       In = Cast->getOperand(0);
12775     unsigned Index = isLE ? (i * ElemRatio) :
12776                             (i * ElemRatio + (ElemRatio - 1));
12777 
12778     assert(Index < Ops.size() && "Invalid index");
12779     Ops[Index] = In;
12780   }
12781 
12782   // The type of the new BUILD_VECTOR node.
12783   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
12784   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
12785          "Invalid vector size");
12786   // Check if the new vector type is legal.
12787   if (!isTypeLegal(VecVT)) return SDValue();
12788 
12789   // Make the new BUILD_VECTOR.
12790   SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
12791 
12792   // The new BUILD_VECTOR node has the potential to be further optimized.
12793   AddToWorklist(BV.getNode());
12794   // Bitcast to the desired type.
12795   return DAG.getBitcast(VT, BV);
12796 }
12797 
12798 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
12799   EVT VT = N->getValueType(0);
12800 
12801   unsigned NumInScalars = N->getNumOperands();
12802   SDLoc dl(N);
12803 
12804   EVT SrcVT = MVT::Other;
12805   unsigned Opcode = ISD::DELETED_NODE;
12806   unsigned NumDefs = 0;
12807 
12808   for (unsigned i = 0; i != NumInScalars; ++i) {
12809     SDValue In = N->getOperand(i);
12810     unsigned Opc = In.getOpcode();
12811 
12812     if (Opc == ISD::UNDEF)
12813       continue;
12814 
12815     // If all scalar values are floats and converted from integers.
12816     if (Opcode == ISD::DELETED_NODE &&
12817         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
12818       Opcode = Opc;
12819     }
12820 
12821     if (Opc != Opcode)
12822       return SDValue();
12823 
12824     EVT InVT = In.getOperand(0).getValueType();
12825 
12826     // If all scalar values are typed differently, bail out. It's chosen to
12827     // simplify BUILD_VECTOR of integer types.
12828     if (SrcVT == MVT::Other)
12829       SrcVT = InVT;
12830     if (SrcVT != InVT)
12831       return SDValue();
12832     NumDefs++;
12833   }
12834 
12835   // If the vector has just one element defined, it's not worth to fold it into
12836   // a vectorized one.
12837   if (NumDefs < 2)
12838     return SDValue();
12839 
12840   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
12841          && "Should only handle conversion from integer to float.");
12842   assert(SrcVT != MVT::Other && "Cannot determine source type!");
12843 
12844   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
12845 
12846   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
12847     return SDValue();
12848 
12849   // Just because the floating-point vector type is legal does not necessarily
12850   // mean that the corresponding integer vector type is.
12851   if (!isTypeLegal(NVT))
12852     return SDValue();
12853 
12854   SmallVector<SDValue, 8> Opnds;
12855   for (unsigned i = 0; i != NumInScalars; ++i) {
12856     SDValue In = N->getOperand(i);
12857 
12858     if (In.isUndef())
12859       Opnds.push_back(DAG.getUNDEF(SrcVT));
12860     else
12861       Opnds.push_back(In.getOperand(0));
12862   }
12863   SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
12864   AddToWorklist(BV.getNode());
12865 
12866   return DAG.getNode(Opcode, dl, VT, BV);
12867 }
12868 
12869 // If Vec holds a reference to a non-null node, return Vec.
12870 // Otherwise, return either a zero or an undef node of the appropriate type.
12871 static SDValue getRightHandValue(SelectionDAG &DAG, SDLoc DL, SDValue Vec,
12872                                  EVT VT, bool Zero) {
12873   if (Vec.getNode())
12874     return Vec;
12875 
12876   if (Zero)
12877     return VT.isInteger() ? DAG.getConstant(0, DL, VT)
12878                           : DAG.getConstantFP(0.0, DL, VT);
12879 
12880   return DAG.getUNDEF(VT);
12881 }
12882 
12883 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
12884 // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
12885 // at most two distinct vectors, turn this into a shuffle node.
12886 // TODO: Support more than two inputs by constructing a tree of shuffles.
12887 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
12888   SDLoc dl(N);
12889   EVT VT = N->getValueType(0);
12890 
12891   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
12892   if (!isTypeLegal(VT))
12893     return SDValue();
12894 
12895   // May only combine to shuffle after legalize if shuffle is legal.
12896   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
12897     return SDValue();
12898 
12899   SDValue VecIn1, VecIn2;
12900   bool UsesZeroVector = false;
12901   unsigned NumElems = N->getNumOperands();
12902 
12903   // Record, for each element of newly built vector, which input it uses.
12904   // 0 stands for the zero vector, 1 and 2 for the two input vectors, and -1
12905   // for undef.
12906   SmallVector<int, 8> VectorMask;
12907   for (unsigned i = 0; i != NumElems; ++i) {
12908     SDValue Op = N->getOperand(i);
12909 
12910     if (Op.isUndef()) {
12911       VectorMask.push_back(-1);
12912       continue;
12913     }
12914 
12915     // See if we can combine this into a blend with a zero vector.
12916     if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
12917       UsesZeroVector = true;
12918       VectorMask.push_back(0);
12919       continue;
12920     }
12921 
12922     // Not an undef or zero. If the input is something other than an
12923     // EXTRACT_VECTOR_ELT with a constant index, bail out.
12924     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12925         !isa<ConstantSDNode>(Op.getOperand(1)))
12926       return SDValue();
12927 
12928     // We only allow up to two distinct input vectors.
12929     SDValue ExtractedFromVec = Op.getOperand(0);
12930     if (ExtractedFromVec == VecIn1) {
12931       VectorMask.push_back(1);
12932       continue;
12933     }
12934     if (ExtractedFromVec == VecIn2) {
12935       VectorMask.push_back(2);
12936       continue;
12937     }
12938 
12939     if (!VecIn1.getNode()) {
12940       VecIn1 = ExtractedFromVec;
12941       VectorMask.push_back(1);
12942     } else if (!VecIn2.getNode() && !UsesZeroVector) {
12943       VecIn2 = ExtractedFromVec;
12944       VectorMask.push_back(2);
12945     } else {
12946       return SDValue();
12947     }
12948   }
12949 
12950   // If we didn't find at least one input vector, bail out.
12951   if (!VecIn1.getNode())
12952     return SDValue();
12953 
12954   EVT InVT1 = VecIn1.getValueType();
12955   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
12956   unsigned Vec2Offset = InVT1.getVectorNumElements();
12957 
12958   // We can't generate a shuffle node with mismatched input and output types.
12959   // Try to make the types match.
12960   if (InVT1 != VT || InVT2 != VT) {
12961     // Both inputs and the output must have the same base element type.
12962     EVT ElemType = VT.getVectorElementType();
12963     if (ElemType != InVT1.getVectorElementType() ||
12964         ElemType != InVT2.getVectorElementType())
12965       return SDValue();
12966 
12967     // The element types match, now figure out the lengths.
12968     if (InVT1.getSizeInBits() * 2 == VT.getSizeInBits() && InVT1 == InVT2) {
12969       // If both input vectors are exactly half the size of the output, concat
12970       // them. If we have only one (non-zero) input, concat it with undef.
12971       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
12972                            getRightHandValue(DAG, dl, VecIn2, InVT1, false));
12973       VecIn2 = SDValue();
12974       // If we have one "real" input and are blending with zero, we need the
12975       // zero elements to come from the second input, not the undef part of the
12976       // first input.
12977       if (UsesZeroVector)
12978         Vec2Offset = NumElems;
12979     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
12980       // If we only have one input vector, and it's twice the size of the
12981       // output, split it in two.
12982       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
12983         return SDValue();
12984 
12985       // TODO: Support the case where we have one input that's too wide, and
12986       // another input which is wide/"correct"/narrow. We can do this by
12987       // widening the narrow input, shuffling the wide vectors, and then
12988       // extracting the low subvector.
12989       if (UsesZeroVector || VecIn2.getNode())
12990         return SDValue();
12991 
12992       MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12993       VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12994                            DAG.getConstant(NumElems, dl, IdxTy));
12995       VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12996                            DAG.getConstant(0, dl, IdxTy));
12997       // Since we now have shorter input vectors, adjust the offset of the
12998       // second vector's start.
12999       Vec2Offset = NumElems;
13000     } else {
13001       // TODO: Support cases where the length mismatch isn't exactly by a
13002       // factor of 2.
13003       return SDValue();
13004     }
13005   }
13006 
13007   SmallVector<int, 8> Mask;
13008 
13009   for (unsigned i = 0; i != NumElems; ++i) {
13010     if (VectorMask[i] == -1) {
13011       Mask.push_back(-1);
13012       continue;
13013     }
13014 
13015     // If we are trying to blend with zero, we need to take a zero from the
13016     // correct position in the second input.
13017     if (VectorMask[i] == 0) {
13018       Mask.push_back(Vec2Offset + i);
13019       continue;
13020     }
13021 
13022     SDValue Extract = N->getOperand(i);
13023     unsigned ExtIndex =
13024         cast<ConstantSDNode>(Extract.getOperand(1))->getZExtValue();
13025 
13026     if (VectorMask[i] == 1) {
13027       Mask.push_back(ExtIndex);
13028       continue;
13029     }
13030 
13031     assert(VectorMask[i] == 2 && "Expected input to be from second vector");
13032     Mask.push_back(Vec2Offset + ExtIndex);
13033   }
13034 
13035   // Avoid introducing illegal shuffles with zero.
13036   // TODO: This doesn't actually do anything smart at the moment.
13037   // We should either delete this, or check legality for all the shuffles
13038   // we create.
13039   if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
13040     return SDValue();
13041 
13042   // If we already have a VecIn2, it should have the same type as VecIn1.
13043   // If we don't, get an undef/zero vector of the appropriate type.
13044   VecIn2 =
13045       getRightHandValue(DAG, dl, VecIn2, VecIn1.getValueType(), UsesZeroVector);
13046   assert(VecIn1.getValueType() == VecIn2.getValueType() &&
13047          "Unexpected second input type.");
13048 
13049   // Return the new VECTOR_SHUFFLE node.
13050   SDValue Ops[2];
13051   Ops[0] = VecIn1;
13052   Ops[1] = VecIn2;
13053   return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
13054 }
13055 
13056 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13057   EVT VT = N->getValueType(0);
13058 
13059   // A vector built entirely of undefs is undef.
13060   if (ISD::allOperandsUndef(N))
13061     return DAG.getUNDEF(VT);
13062 
13063   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13064     return V;
13065 
13066   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13067     return V;
13068 
13069   if (SDValue V = reduceBuildVecToShuffle(N))
13070     return V;
13071 
13072   return SDValue();
13073 }
13074 
13075 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
13076   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13077   EVT OpVT = N->getOperand(0).getValueType();
13078 
13079   // If the operands are legal vectors, leave them alone.
13080   if (TLI.isTypeLegal(OpVT))
13081     return SDValue();
13082 
13083   SDLoc DL(N);
13084   EVT VT = N->getValueType(0);
13085   SmallVector<SDValue, 8> Ops;
13086 
13087   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13088   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13089 
13090   // Keep track of what we encounter.
13091   bool AnyInteger = false;
13092   bool AnyFP = false;
13093   for (const SDValue &Op : N->ops()) {
13094     if (ISD::BITCAST == Op.getOpcode() &&
13095         !Op.getOperand(0).getValueType().isVector())
13096       Ops.push_back(Op.getOperand(0));
13097     else if (ISD::UNDEF == Op.getOpcode())
13098       Ops.push_back(ScalarUndef);
13099     else
13100       return SDValue();
13101 
13102     // Note whether we encounter an integer or floating point scalar.
13103     // If it's neither, bail out, it could be something weird like x86mmx.
13104     EVT LastOpVT = Ops.back().getValueType();
13105     if (LastOpVT.isFloatingPoint())
13106       AnyFP = true;
13107     else if (LastOpVT.isInteger())
13108       AnyInteger = true;
13109     else
13110       return SDValue();
13111   }
13112 
13113   // If any of the operands is a floating point scalar bitcast to a vector,
13114   // use floating point types throughout, and bitcast everything.
13115   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13116   if (AnyFP) {
13117     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13118     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13119     if (AnyInteger) {
13120       for (SDValue &Op : Ops) {
13121         if (Op.getValueType() == SVT)
13122           continue;
13123         if (Op.isUndef())
13124           Op = ScalarUndef;
13125         else
13126           Op = DAG.getBitcast(SVT, Op);
13127       }
13128     }
13129   }
13130 
13131   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13132                                VT.getSizeInBits() / SVT.getSizeInBits());
13133   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13134 }
13135 
13136 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13137 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13138 // most two distinct vectors the same size as the result, attempt to turn this
13139 // into a legal shuffle.
13140 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
13141   EVT VT = N->getValueType(0);
13142   EVT OpVT = N->getOperand(0).getValueType();
13143   int NumElts = VT.getVectorNumElements();
13144   int NumOpElts = OpVT.getVectorNumElements();
13145 
13146   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13147   SmallVector<int, 8> Mask;
13148 
13149   for (SDValue Op : N->ops()) {
13150     // Peek through any bitcast.
13151     while (Op.getOpcode() == ISD::BITCAST)
13152       Op = Op.getOperand(0);
13153 
13154     // UNDEF nodes convert to UNDEF shuffle mask values.
13155     if (Op.isUndef()) {
13156       Mask.append((unsigned)NumOpElts, -1);
13157       continue;
13158     }
13159 
13160     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13161       return SDValue();
13162 
13163     // What vector are we extracting the subvector from and at what index?
13164     SDValue ExtVec = Op.getOperand(0);
13165 
13166     // We want the EVT of the original extraction to correctly scale the
13167     // extraction index.
13168     EVT ExtVT = ExtVec.getValueType();
13169 
13170     // Peek through any bitcast.
13171     while (ExtVec.getOpcode() == ISD::BITCAST)
13172       ExtVec = ExtVec.getOperand(0);
13173 
13174     // UNDEF nodes convert to UNDEF shuffle mask values.
13175     if (ExtVec.isUndef()) {
13176       Mask.append((unsigned)NumOpElts, -1);
13177       continue;
13178     }
13179 
13180     if (!isa<ConstantSDNode>(Op.getOperand(1)))
13181       return SDValue();
13182     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
13183 
13184     // Ensure that we are extracting a subvector from a vector the same
13185     // size as the result.
13186     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
13187       return SDValue();
13188 
13189     // Scale the subvector index to account for any bitcast.
13190     int NumExtElts = ExtVT.getVectorNumElements();
13191     if (0 == (NumExtElts % NumElts))
13192       ExtIdx /= (NumExtElts / NumElts);
13193     else if (0 == (NumElts % NumExtElts))
13194       ExtIdx *= (NumElts / NumExtElts);
13195     else
13196       return SDValue();
13197 
13198     // At most we can reference 2 inputs in the final shuffle.
13199     if (SV0.isUndef() || SV0 == ExtVec) {
13200       SV0 = ExtVec;
13201       for (int i = 0; i != NumOpElts; ++i)
13202         Mask.push_back(i + ExtIdx);
13203     } else if (SV1.isUndef() || SV1 == ExtVec) {
13204       SV1 = ExtVec;
13205       for (int i = 0; i != NumOpElts; ++i)
13206         Mask.push_back(i + ExtIdx + NumElts);
13207     } else {
13208       return SDValue();
13209     }
13210   }
13211 
13212   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
13213     return SDValue();
13214 
13215   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
13216                               DAG.getBitcast(VT, SV1), Mask);
13217 }
13218 
13219 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
13220   // If we only have one input vector, we don't need to do any concatenation.
13221   if (N->getNumOperands() == 1)
13222     return N->getOperand(0);
13223 
13224   // Check if all of the operands are undefs.
13225   EVT VT = N->getValueType(0);
13226   if (ISD::allOperandsUndef(N))
13227     return DAG.getUNDEF(VT);
13228 
13229   // Optimize concat_vectors where all but the first of the vectors are undef.
13230   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
13231         return Op.isUndef();
13232       })) {
13233     SDValue In = N->getOperand(0);
13234     assert(In.getValueType().isVector() && "Must concat vectors");
13235 
13236     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
13237     if (In->getOpcode() == ISD::BITCAST &&
13238         !In->getOperand(0)->getValueType(0).isVector()) {
13239       SDValue Scalar = In->getOperand(0);
13240 
13241       // If the bitcast type isn't legal, it might be a trunc of a legal type;
13242       // look through the trunc so we can still do the transform:
13243       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
13244       if (Scalar->getOpcode() == ISD::TRUNCATE &&
13245           !TLI.isTypeLegal(Scalar.getValueType()) &&
13246           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
13247         Scalar = Scalar->getOperand(0);
13248 
13249       EVT SclTy = Scalar->getValueType(0);
13250 
13251       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
13252         return SDValue();
13253 
13254       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
13255                                  VT.getSizeInBits() / SclTy.getSizeInBits());
13256       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
13257         return SDValue();
13258 
13259       SDLoc dl = SDLoc(N);
13260       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
13261       return DAG.getBitcast(VT, Res);
13262     }
13263   }
13264 
13265   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
13266   // We have already tested above for an UNDEF only concatenation.
13267   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
13268   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
13269   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
13270     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
13271   };
13272   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
13273     SmallVector<SDValue, 8> Opnds;
13274     EVT SVT = VT.getScalarType();
13275 
13276     EVT MinVT = SVT;
13277     if (!SVT.isFloatingPoint()) {
13278       // If BUILD_VECTOR are from built from integer, they may have different
13279       // operand types. Get the smallest type and truncate all operands to it.
13280       bool FoundMinVT = false;
13281       for (const SDValue &Op : N->ops())
13282         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13283           EVT OpSVT = Op.getOperand(0)->getValueType(0);
13284           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
13285           FoundMinVT = true;
13286         }
13287       assert(FoundMinVT && "Concat vector type mismatch");
13288     }
13289 
13290     for (const SDValue &Op : N->ops()) {
13291       EVT OpVT = Op.getValueType();
13292       unsigned NumElts = OpVT.getVectorNumElements();
13293 
13294       if (ISD::UNDEF == Op.getOpcode())
13295         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
13296 
13297       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13298         if (SVT.isFloatingPoint()) {
13299           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
13300           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
13301         } else {
13302           for (unsigned i = 0; i != NumElts; ++i)
13303             Opnds.push_back(
13304                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
13305         }
13306       }
13307     }
13308 
13309     assert(VT.getVectorNumElements() == Opnds.size() &&
13310            "Concat vector type mismatch");
13311     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13312   }
13313 
13314   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
13315   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
13316     return V;
13317 
13318   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
13319   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
13320     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
13321       return V;
13322 
13323   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
13324   // nodes often generate nop CONCAT_VECTOR nodes.
13325   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
13326   // place the incoming vectors at the exact same location.
13327   SDValue SingleSource = SDValue();
13328   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
13329 
13330   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13331     SDValue Op = N->getOperand(i);
13332 
13333     if (Op.isUndef())
13334       continue;
13335 
13336     // Check if this is the identity extract:
13337     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13338       return SDValue();
13339 
13340     // Find the single incoming vector for the extract_subvector.
13341     if (SingleSource.getNode()) {
13342       if (Op.getOperand(0) != SingleSource)
13343         return SDValue();
13344     } else {
13345       SingleSource = Op.getOperand(0);
13346 
13347       // Check the source type is the same as the type of the result.
13348       // If not, this concat may extend the vector, so we can not
13349       // optimize it away.
13350       if (SingleSource.getValueType() != N->getValueType(0))
13351         return SDValue();
13352     }
13353 
13354     unsigned IdentityIndex = i * PartNumElem;
13355     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13356     // The extract index must be constant.
13357     if (!CS)
13358       return SDValue();
13359 
13360     // Check that we are reading from the identity index.
13361     if (CS->getZExtValue() != IdentityIndex)
13362       return SDValue();
13363   }
13364 
13365   if (SingleSource.getNode())
13366     return SingleSource;
13367 
13368   return SDValue();
13369 }
13370 
13371 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
13372   EVT NVT = N->getValueType(0);
13373   SDValue V = N->getOperand(0);
13374 
13375   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
13376     // Combine:
13377     //    (extract_subvec (concat V1, V2, ...), i)
13378     // Into:
13379     //    Vi if possible
13380     // Only operand 0 is checked as 'concat' assumes all inputs of the same
13381     // type.
13382     if (V->getOperand(0).getValueType() != NVT)
13383       return SDValue();
13384     unsigned Idx = N->getConstantOperandVal(1);
13385     unsigned NumElems = NVT.getVectorNumElements();
13386     assert((Idx % NumElems) == 0 &&
13387            "IDX in concat is not a multiple of the result vector length.");
13388     return V->getOperand(Idx / NumElems);
13389   }
13390 
13391   // Skip bitcasting
13392   if (V->getOpcode() == ISD::BITCAST)
13393     V = V.getOperand(0);
13394 
13395   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
13396     SDLoc dl(N);
13397     // Handle only simple case where vector being inserted and vector
13398     // being extracted are of same type, and are half size of larger vectors.
13399     EVT BigVT = V->getOperand(0).getValueType();
13400     EVT SmallVT = V->getOperand(1).getValueType();
13401     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
13402       return SDValue();
13403 
13404     // Only handle cases where both indexes are constants with the same type.
13405     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
13406     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
13407 
13408     if (InsIdx && ExtIdx &&
13409         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
13410         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
13411       // Combine:
13412       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
13413       // Into:
13414       //    indices are equal or bit offsets are equal => V1
13415       //    otherwise => (extract_subvec V1, ExtIdx)
13416       if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
13417           ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
13418         return DAG.getBitcast(NVT, V->getOperand(1));
13419       return DAG.getNode(
13420           ISD::EXTRACT_SUBVECTOR, dl, NVT,
13421           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
13422           N->getOperand(1));
13423     }
13424   }
13425 
13426   return SDValue();
13427 }
13428 
13429 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
13430                                                  SDValue V, SelectionDAG &DAG) {
13431   SDLoc DL(V);
13432   EVT VT = V.getValueType();
13433 
13434   switch (V.getOpcode()) {
13435   default:
13436     return V;
13437 
13438   case ISD::CONCAT_VECTORS: {
13439     EVT OpVT = V->getOperand(0).getValueType();
13440     int OpSize = OpVT.getVectorNumElements();
13441     SmallBitVector OpUsedElements(OpSize, false);
13442     bool FoundSimplification = false;
13443     SmallVector<SDValue, 4> NewOps;
13444     NewOps.reserve(V->getNumOperands());
13445     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
13446       SDValue Op = V->getOperand(i);
13447       bool OpUsed = false;
13448       for (int j = 0; j < OpSize; ++j)
13449         if (UsedElements[i * OpSize + j]) {
13450           OpUsedElements[j] = true;
13451           OpUsed = true;
13452         }
13453       NewOps.push_back(
13454           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
13455                  : DAG.getUNDEF(OpVT));
13456       FoundSimplification |= Op == NewOps.back();
13457       OpUsedElements.reset();
13458     }
13459     if (FoundSimplification)
13460       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
13461     return V;
13462   }
13463 
13464   case ISD::INSERT_SUBVECTOR: {
13465     SDValue BaseV = V->getOperand(0);
13466     SDValue SubV = V->getOperand(1);
13467     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
13468     if (!IdxN)
13469       return V;
13470 
13471     int SubSize = SubV.getValueType().getVectorNumElements();
13472     int Idx = IdxN->getZExtValue();
13473     bool SubVectorUsed = false;
13474     SmallBitVector SubUsedElements(SubSize, false);
13475     for (int i = 0; i < SubSize; ++i)
13476       if (UsedElements[i + Idx]) {
13477         SubVectorUsed = true;
13478         SubUsedElements[i] = true;
13479         UsedElements[i + Idx] = false;
13480       }
13481 
13482     // Now recurse on both the base and sub vectors.
13483     SDValue SimplifiedSubV =
13484         SubVectorUsed
13485             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
13486             : DAG.getUNDEF(SubV.getValueType());
13487     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
13488     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
13489       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
13490                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
13491     return V;
13492   }
13493   }
13494 }
13495 
13496 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
13497                                        SDValue N1, SelectionDAG &DAG) {
13498   EVT VT = SVN->getValueType(0);
13499   int NumElts = VT.getVectorNumElements();
13500   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
13501   for (int M : SVN->getMask())
13502     if (M >= 0 && M < NumElts)
13503       N0UsedElements[M] = true;
13504     else if (M >= NumElts)
13505       N1UsedElements[M - NumElts] = true;
13506 
13507   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
13508   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
13509   if (S0 == N0 && S1 == N1)
13510     return SDValue();
13511 
13512   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
13513 }
13514 
13515 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
13516 // or turn a shuffle of a single concat into simpler shuffle then concat.
13517 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
13518   EVT VT = N->getValueType(0);
13519   unsigned NumElts = VT.getVectorNumElements();
13520 
13521   SDValue N0 = N->getOperand(0);
13522   SDValue N1 = N->getOperand(1);
13523   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13524 
13525   SmallVector<SDValue, 4> Ops;
13526   EVT ConcatVT = N0.getOperand(0).getValueType();
13527   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
13528   unsigned NumConcats = NumElts / NumElemsPerConcat;
13529 
13530   // Special case: shuffle(concat(A,B)) can be more efficiently represented
13531   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
13532   // half vector elements.
13533   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
13534       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
13535                   SVN->getMask().end(), [](int i) { return i == -1; })) {
13536     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
13537                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
13538     N1 = DAG.getUNDEF(ConcatVT);
13539     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
13540   }
13541 
13542   // Look at every vector that's inserted. We're looking for exact
13543   // subvector-sized copies from a concatenated vector
13544   for (unsigned I = 0; I != NumConcats; ++I) {
13545     // Make sure we're dealing with a copy.
13546     unsigned Begin = I * NumElemsPerConcat;
13547     bool AllUndef = true, NoUndef = true;
13548     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
13549       if (SVN->getMaskElt(J) >= 0)
13550         AllUndef = false;
13551       else
13552         NoUndef = false;
13553     }
13554 
13555     if (NoUndef) {
13556       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
13557         return SDValue();
13558 
13559       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
13560         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
13561           return SDValue();
13562 
13563       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
13564       if (FirstElt < N0.getNumOperands())
13565         Ops.push_back(N0.getOperand(FirstElt));
13566       else
13567         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
13568 
13569     } else if (AllUndef) {
13570       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
13571     } else { // Mixed with general masks and undefs, can't do optimization.
13572       return SDValue();
13573     }
13574   }
13575 
13576   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
13577 }
13578 
13579 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
13580   EVT VT = N->getValueType(0);
13581   unsigned NumElts = VT.getVectorNumElements();
13582 
13583   SDValue N0 = N->getOperand(0);
13584   SDValue N1 = N->getOperand(1);
13585 
13586   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
13587 
13588   // Canonicalize shuffle undef, undef -> undef
13589   if (N0.isUndef() && N1.isUndef())
13590     return DAG.getUNDEF(VT);
13591 
13592   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13593 
13594   // Canonicalize shuffle v, v -> v, undef
13595   if (N0 == N1) {
13596     SmallVector<int, 8> NewMask;
13597     for (unsigned i = 0; i != NumElts; ++i) {
13598       int Idx = SVN->getMaskElt(i);
13599       if (Idx >= (int)NumElts) Idx -= NumElts;
13600       NewMask.push_back(Idx);
13601     }
13602     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
13603   }
13604 
13605   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
13606   if (N0.isUndef())
13607     return DAG.getCommutedVectorShuffle(*SVN);
13608 
13609   // Remove references to rhs if it is undef
13610   if (N1.isUndef()) {
13611     bool Changed = false;
13612     SmallVector<int, 8> NewMask;
13613     for (unsigned i = 0; i != NumElts; ++i) {
13614       int Idx = SVN->getMaskElt(i);
13615       if (Idx >= (int)NumElts) {
13616         Idx = -1;
13617         Changed = true;
13618       }
13619       NewMask.push_back(Idx);
13620     }
13621     if (Changed)
13622       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
13623   }
13624 
13625   // If it is a splat, check if the argument vector is another splat or a
13626   // build_vector.
13627   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
13628     SDNode *V = N0.getNode();
13629 
13630     // If this is a bit convert that changes the element type of the vector but
13631     // not the number of vector elements, look through it.  Be careful not to
13632     // look though conversions that change things like v4f32 to v2f64.
13633     if (V->getOpcode() == ISD::BITCAST) {
13634       SDValue ConvInput = V->getOperand(0);
13635       if (ConvInput.getValueType().isVector() &&
13636           ConvInput.getValueType().getVectorNumElements() == NumElts)
13637         V = ConvInput.getNode();
13638     }
13639 
13640     if (V->getOpcode() == ISD::BUILD_VECTOR) {
13641       assert(V->getNumOperands() == NumElts &&
13642              "BUILD_VECTOR has wrong number of operands");
13643       SDValue Base;
13644       bool AllSame = true;
13645       for (unsigned i = 0; i != NumElts; ++i) {
13646         if (!V->getOperand(i).isUndef()) {
13647           Base = V->getOperand(i);
13648           break;
13649         }
13650       }
13651       // Splat of <u, u, u, u>, return <u, u, u, u>
13652       if (!Base.getNode())
13653         return N0;
13654       for (unsigned i = 0; i != NumElts; ++i) {
13655         if (V->getOperand(i) != Base) {
13656           AllSame = false;
13657           break;
13658         }
13659       }
13660       // Splat of <x, x, x, x>, return <x, x, x, x>
13661       if (AllSame)
13662         return N0;
13663 
13664       // Canonicalize any other splat as a build_vector.
13665       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
13666       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
13667       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
13668 
13669       // We may have jumped through bitcasts, so the type of the
13670       // BUILD_VECTOR may not match the type of the shuffle.
13671       if (V->getValueType(0) != VT)
13672         NewBV = DAG.getBitcast(VT, NewBV);
13673       return NewBV;
13674     }
13675   }
13676 
13677   // There are various patterns used to build up a vector from smaller vectors,
13678   // subvectors, or elements. Scan chains of these and replace unused insertions
13679   // or components with undef.
13680   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
13681     return S;
13682 
13683   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
13684       Level < AfterLegalizeVectorOps &&
13685       (N1.isUndef() ||
13686       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13687        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
13688     if (SDValue V = partitionShuffleOfConcats(N, DAG))
13689       return V;
13690   }
13691 
13692   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
13693   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
13694   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
13695     SmallVector<SDValue, 8> Ops;
13696     for (int M : SVN->getMask()) {
13697       SDValue Op = DAG.getUNDEF(VT.getScalarType());
13698       if (M >= 0) {
13699         int Idx = M % NumElts;
13700         SDValue &S = (M < (int)NumElts ? N0 : N1);
13701         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
13702           Op = S.getOperand(Idx);
13703         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
13704           if (Idx == 0)
13705             Op = S.getOperand(0);
13706         } else {
13707           // Operand can't be combined - bail out.
13708           break;
13709         }
13710       }
13711       Ops.push_back(Op);
13712     }
13713     if (Ops.size() == VT.getVectorNumElements()) {
13714       // BUILD_VECTOR requires all inputs to be of the same type, find the
13715       // maximum type and extend them all.
13716       EVT SVT = VT.getScalarType();
13717       if (SVT.isInteger())
13718         for (SDValue &Op : Ops)
13719           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
13720       if (SVT != VT.getScalarType())
13721         for (SDValue &Op : Ops)
13722           Op = TLI.isZExtFree(Op.getValueType(), SVT)
13723                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
13724                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
13725       return DAG.getBuildVector(VT, SDLoc(N), Ops);
13726     }
13727   }
13728 
13729   // If this shuffle only has a single input that is a bitcasted shuffle,
13730   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
13731   // back to their original types.
13732   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13733       N1.isUndef() && Level < AfterLegalizeVectorOps &&
13734       TLI.isTypeLegal(VT)) {
13735 
13736     // Peek through the bitcast only if there is one user.
13737     SDValue BC0 = N0;
13738     while (BC0.getOpcode() == ISD::BITCAST) {
13739       if (!BC0.hasOneUse())
13740         break;
13741       BC0 = BC0.getOperand(0);
13742     }
13743 
13744     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
13745       if (Scale == 1)
13746         return SmallVector<int, 8>(Mask.begin(), Mask.end());
13747 
13748       SmallVector<int, 8> NewMask;
13749       for (int M : Mask)
13750         for (int s = 0; s != Scale; ++s)
13751           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
13752       return NewMask;
13753     };
13754 
13755     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
13756       EVT SVT = VT.getScalarType();
13757       EVT InnerVT = BC0->getValueType(0);
13758       EVT InnerSVT = InnerVT.getScalarType();
13759 
13760       // Determine which shuffle works with the smaller scalar type.
13761       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
13762       EVT ScaleSVT = ScaleVT.getScalarType();
13763 
13764       if (TLI.isTypeLegal(ScaleVT) &&
13765           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
13766           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
13767 
13768         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13769         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
13770 
13771         // Scale the shuffle masks to the smaller scalar type.
13772         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
13773         SmallVector<int, 8> InnerMask =
13774             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
13775         SmallVector<int, 8> OuterMask =
13776             ScaleShuffleMask(SVN->getMask(), OuterScale);
13777 
13778         // Merge the shuffle masks.
13779         SmallVector<int, 8> NewMask;
13780         for (int M : OuterMask)
13781           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
13782 
13783         // Test for shuffle mask legality over both commutations.
13784         SDValue SV0 = BC0->getOperand(0);
13785         SDValue SV1 = BC0->getOperand(1);
13786         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13787         if (!LegalMask) {
13788           std::swap(SV0, SV1);
13789           ShuffleVectorSDNode::commuteMask(NewMask);
13790           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
13791         }
13792 
13793         if (LegalMask) {
13794           SV0 = DAG.getBitcast(ScaleVT, SV0);
13795           SV1 = DAG.getBitcast(ScaleVT, SV1);
13796           return DAG.getBitcast(
13797               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
13798         }
13799       }
13800     }
13801   }
13802 
13803   // Canonicalize shuffles according to rules:
13804   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
13805   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
13806   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
13807   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
13808       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
13809       TLI.isTypeLegal(VT)) {
13810     // The incoming shuffle must be of the same type as the result of the
13811     // current shuffle.
13812     assert(N1->getOperand(0).getValueType() == VT &&
13813            "Shuffle types don't match");
13814 
13815     SDValue SV0 = N1->getOperand(0);
13816     SDValue SV1 = N1->getOperand(1);
13817     bool HasSameOp0 = N0 == SV0;
13818     bool IsSV1Undef = SV1.isUndef();
13819     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
13820       // Commute the operands of this shuffle so that next rule
13821       // will trigger.
13822       return DAG.getCommutedVectorShuffle(*SVN);
13823   }
13824 
13825   // Try to fold according to rules:
13826   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
13827   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
13828   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
13829   // Don't try to fold shuffles with illegal type.
13830   // Only fold if this shuffle is the only user of the other shuffle.
13831   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
13832       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
13833     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
13834 
13835     // The incoming shuffle must be of the same type as the result of the
13836     // current shuffle.
13837     assert(OtherSV->getOperand(0).getValueType() == VT &&
13838            "Shuffle types don't match");
13839 
13840     SDValue SV0, SV1;
13841     SmallVector<int, 4> Mask;
13842     // Compute the combined shuffle mask for a shuffle with SV0 as the first
13843     // operand, and SV1 as the second operand.
13844     for (unsigned i = 0; i != NumElts; ++i) {
13845       int Idx = SVN->getMaskElt(i);
13846       if (Idx < 0) {
13847         // Propagate Undef.
13848         Mask.push_back(Idx);
13849         continue;
13850       }
13851 
13852       SDValue CurrentVec;
13853       if (Idx < (int)NumElts) {
13854         // This shuffle index refers to the inner shuffle N0. Lookup the inner
13855         // shuffle mask to identify which vector is actually referenced.
13856         Idx = OtherSV->getMaskElt(Idx);
13857         if (Idx < 0) {
13858           // Propagate Undef.
13859           Mask.push_back(Idx);
13860           continue;
13861         }
13862 
13863         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
13864                                            : OtherSV->getOperand(1);
13865       } else {
13866         // This shuffle index references an element within N1.
13867         CurrentVec = N1;
13868       }
13869 
13870       // Simple case where 'CurrentVec' is UNDEF.
13871       if (CurrentVec.isUndef()) {
13872         Mask.push_back(-1);
13873         continue;
13874       }
13875 
13876       // Canonicalize the shuffle index. We don't know yet if CurrentVec
13877       // will be the first or second operand of the combined shuffle.
13878       Idx = Idx % NumElts;
13879       if (!SV0.getNode() || SV0 == CurrentVec) {
13880         // Ok. CurrentVec is the left hand side.
13881         // Update the mask accordingly.
13882         SV0 = CurrentVec;
13883         Mask.push_back(Idx);
13884         continue;
13885       }
13886 
13887       // Bail out if we cannot convert the shuffle pair into a single shuffle.
13888       if (SV1.getNode() && SV1 != CurrentVec)
13889         return SDValue();
13890 
13891       // Ok. CurrentVec is the right hand side.
13892       // Update the mask accordingly.
13893       SV1 = CurrentVec;
13894       Mask.push_back(Idx + NumElts);
13895     }
13896 
13897     // Check if all indices in Mask are Undef. In case, propagate Undef.
13898     bool isUndefMask = true;
13899     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
13900       isUndefMask &= Mask[i] < 0;
13901 
13902     if (isUndefMask)
13903       return DAG.getUNDEF(VT);
13904 
13905     if (!SV0.getNode())
13906       SV0 = DAG.getUNDEF(VT);
13907     if (!SV1.getNode())
13908       SV1 = DAG.getUNDEF(VT);
13909 
13910     // Avoid introducing shuffles with illegal mask.
13911     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
13912       ShuffleVectorSDNode::commuteMask(Mask);
13913 
13914       if (!TLI.isShuffleMaskLegal(Mask, VT))
13915         return SDValue();
13916 
13917       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
13918       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
13919       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
13920       std::swap(SV0, SV1);
13921     }
13922 
13923     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
13924     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
13925     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
13926     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
13927   }
13928 
13929   return SDValue();
13930 }
13931 
13932 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
13933   SDValue InVal = N->getOperand(0);
13934   EVT VT = N->getValueType(0);
13935 
13936   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
13937   // with a VECTOR_SHUFFLE.
13938   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13939     SDValue InVec = InVal->getOperand(0);
13940     SDValue EltNo = InVal->getOperand(1);
13941 
13942     // FIXME: We could support implicit truncation if the shuffle can be
13943     // scaled to a smaller vector scalar type.
13944     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
13945     if (C0 && VT == InVec.getValueType() &&
13946         VT.getScalarType() == InVal.getValueType()) {
13947       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
13948       int Elt = C0->getZExtValue();
13949       NewMask[0] = Elt;
13950 
13951       if (TLI.isShuffleMaskLegal(NewMask, VT))
13952         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
13953                                     NewMask);
13954     }
13955   }
13956 
13957   return SDValue();
13958 }
13959 
13960 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
13961   EVT VT = N->getValueType(0);
13962   SDValue N0 = N->getOperand(0);
13963   SDValue N1 = N->getOperand(1);
13964   SDValue N2 = N->getOperand(2);
13965 
13966   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
13967   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
13968   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
13969   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
13970       N0.getOperand(1).getValueType() == N1.getValueType() &&
13971       N0.getOperand(2) == N2)
13972     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
13973                        N1, N2);
13974 
13975   if (N0.getValueType() != N1.getValueType())
13976     return SDValue();
13977 
13978   // If the input vector is a concatenation, and the insert replaces
13979   // one of the halves, we can optimize into a single concat_vectors.
13980   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
13981       N2.getOpcode() == ISD::Constant) {
13982     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
13983 
13984     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
13985     // (concat_vectors Z, Y)
13986     if (InsIdx == 0)
13987       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
13988                          N0.getOperand(1));
13989 
13990     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
13991     // (concat_vectors X, Z)
13992     if (InsIdx == VT.getVectorNumElements() / 2)
13993       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
13994                          N1);
13995   }
13996 
13997   return SDValue();
13998 }
13999 
14000 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
14001   SDValue N0 = N->getOperand(0);
14002 
14003   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
14004   if (N0->getOpcode() == ISD::FP16_TO_FP)
14005     return N0->getOperand(0);
14006 
14007   return SDValue();
14008 }
14009 
14010 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
14011   SDValue N0 = N->getOperand(0);
14012 
14013   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
14014   if (N0->getOpcode() == ISD::AND) {
14015     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
14016     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
14017       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
14018                          N0.getOperand(0));
14019     }
14020   }
14021 
14022   return SDValue();
14023 }
14024 
14025 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
14026 /// with the destination vector and a zero vector.
14027 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
14028 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
14029 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
14030   EVT VT = N->getValueType(0);
14031   SDValue LHS = N->getOperand(0);
14032   SDValue RHS = N->getOperand(1);
14033   SDLoc dl(N);
14034 
14035   // Make sure we're not running after operation legalization where it
14036   // may have custom lowered the vector shuffles.
14037   if (LegalOperations)
14038     return SDValue();
14039 
14040   if (N->getOpcode() != ISD::AND)
14041     return SDValue();
14042 
14043   if (RHS.getOpcode() == ISD::BITCAST)
14044     RHS = RHS.getOperand(0);
14045 
14046   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
14047     return SDValue();
14048 
14049   EVT RVT = RHS.getValueType();
14050   unsigned NumElts = RHS.getNumOperands();
14051 
14052   // Attempt to create a valid clear mask, splitting the mask into
14053   // sub elements and checking to see if each is
14054   // all zeros or all ones - suitable for shuffle masking.
14055   auto BuildClearMask = [&](int Split) {
14056     int NumSubElts = NumElts * Split;
14057     int NumSubBits = RVT.getScalarSizeInBits() / Split;
14058 
14059     SmallVector<int, 8> Indices;
14060     for (int i = 0; i != NumSubElts; ++i) {
14061       int EltIdx = i / Split;
14062       int SubIdx = i % Split;
14063       SDValue Elt = RHS.getOperand(EltIdx);
14064       if (Elt.isUndef()) {
14065         Indices.push_back(-1);
14066         continue;
14067       }
14068 
14069       APInt Bits;
14070       if (isa<ConstantSDNode>(Elt))
14071         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
14072       else if (isa<ConstantFPSDNode>(Elt))
14073         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
14074       else
14075         return SDValue();
14076 
14077       // Extract the sub element from the constant bit mask.
14078       if (DAG.getDataLayout().isBigEndian()) {
14079         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
14080       } else {
14081         Bits = Bits.lshr(SubIdx * NumSubBits);
14082       }
14083 
14084       if (Split > 1)
14085         Bits = Bits.trunc(NumSubBits);
14086 
14087       if (Bits.isAllOnesValue())
14088         Indices.push_back(i);
14089       else if (Bits == 0)
14090         Indices.push_back(i + NumSubElts);
14091       else
14092         return SDValue();
14093     }
14094 
14095     // Let's see if the target supports this vector_shuffle.
14096     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
14097     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
14098     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
14099       return SDValue();
14100 
14101     SDValue Zero = DAG.getConstant(0, dl, ClearVT);
14102     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
14103                                                    DAG.getBitcast(ClearVT, LHS),
14104                                                    Zero, Indices));
14105   };
14106 
14107   // Determine maximum split level (byte level masking).
14108   int MaxSplit = 1;
14109   if (RVT.getScalarSizeInBits() % 8 == 0)
14110     MaxSplit = RVT.getScalarSizeInBits() / 8;
14111 
14112   for (int Split = 1; Split <= MaxSplit; ++Split)
14113     if (RVT.getScalarSizeInBits() % Split == 0)
14114       if (SDValue S = BuildClearMask(Split))
14115         return S;
14116 
14117   return SDValue();
14118 }
14119 
14120 /// Visit a binary vector operation, like ADD.
14121 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
14122   assert(N->getValueType(0).isVector() &&
14123          "SimplifyVBinOp only works on vectors!");
14124 
14125   SDValue LHS = N->getOperand(0);
14126   SDValue RHS = N->getOperand(1);
14127   SDValue Ops[] = {LHS, RHS};
14128 
14129   // See if we can constant fold the vector operation.
14130   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
14131           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
14132     return Fold;
14133 
14134   // Try to convert a constant mask AND into a shuffle clear mask.
14135   if (SDValue Shuffle = XformToShuffleWithZero(N))
14136     return Shuffle;
14137 
14138   // Type legalization might introduce new shuffles in the DAG.
14139   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
14140   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
14141   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
14142       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
14143       LHS.getOperand(1).isUndef() &&
14144       RHS.getOperand(1).isUndef()) {
14145     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
14146     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
14147 
14148     if (SVN0->getMask().equals(SVN1->getMask())) {
14149       EVT VT = N->getValueType(0);
14150       SDValue UndefVector = LHS.getOperand(1);
14151       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
14152                                      LHS.getOperand(0), RHS.getOperand(0),
14153                                      N->getFlags());
14154       AddUsersToWorklist(N);
14155       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
14156                                   SVN0->getMask());
14157     }
14158   }
14159 
14160   return SDValue();
14161 }
14162 
14163 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
14164                                     SDValue N2) {
14165   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
14166 
14167   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
14168                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
14169 
14170   // If we got a simplified select_cc node back from SimplifySelectCC, then
14171   // break it down into a new SETCC node, and a new SELECT node, and then return
14172   // the SELECT node, since we were called with a SELECT node.
14173   if (SCC.getNode()) {
14174     // Check to see if we got a select_cc back (to turn into setcc/select).
14175     // Otherwise, just return whatever node we got back, like fabs.
14176     if (SCC.getOpcode() == ISD::SELECT_CC) {
14177       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
14178                                   N0.getValueType(),
14179                                   SCC.getOperand(0), SCC.getOperand(1),
14180                                   SCC.getOperand(4));
14181       AddToWorklist(SETCC.getNode());
14182       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
14183                            SCC.getOperand(2), SCC.getOperand(3));
14184     }
14185 
14186     return SCC;
14187   }
14188   return SDValue();
14189 }
14190 
14191 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
14192 /// being selected between, see if we can simplify the select.  Callers of this
14193 /// should assume that TheSelect is deleted if this returns true.  As such, they
14194 /// should return the appropriate thing (e.g. the node) back to the top-level of
14195 /// the DAG combiner loop to avoid it being looked at.
14196 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
14197                                     SDValue RHS) {
14198 
14199   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14200   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
14201   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
14202     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
14203       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
14204       SDValue Sqrt = RHS;
14205       ISD::CondCode CC;
14206       SDValue CmpLHS;
14207       const ConstantFPSDNode *Zero = nullptr;
14208 
14209       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
14210         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
14211         CmpLHS = TheSelect->getOperand(0);
14212         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
14213       } else {
14214         // SELECT or VSELECT
14215         SDValue Cmp = TheSelect->getOperand(0);
14216         if (Cmp.getOpcode() == ISD::SETCC) {
14217           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
14218           CmpLHS = Cmp.getOperand(0);
14219           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
14220         }
14221       }
14222       if (Zero && Zero->isZero() &&
14223           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
14224           CC == ISD::SETULT || CC == ISD::SETLT)) {
14225         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14226         CombineTo(TheSelect, Sqrt);
14227         return true;
14228       }
14229     }
14230   }
14231   // Cannot simplify select with vector condition
14232   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
14233 
14234   // If this is a select from two identical things, try to pull the operation
14235   // through the select.
14236   if (LHS.getOpcode() != RHS.getOpcode() ||
14237       !LHS.hasOneUse() || !RHS.hasOneUse())
14238     return false;
14239 
14240   // If this is a load and the token chain is identical, replace the select
14241   // of two loads with a load through a select of the address to load from.
14242   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
14243   // constants have been dropped into the constant pool.
14244   if (LHS.getOpcode() == ISD::LOAD) {
14245     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
14246     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
14247 
14248     // Token chains must be identical.
14249     if (LHS.getOperand(0) != RHS.getOperand(0) ||
14250         // Do not let this transformation reduce the number of volatile loads.
14251         LLD->isVolatile() || RLD->isVolatile() ||
14252         // FIXME: If either is a pre/post inc/dec load,
14253         // we'd need to split out the address adjustment.
14254         LLD->isIndexed() || RLD->isIndexed() ||
14255         // If this is an EXTLOAD, the VT's must match.
14256         LLD->getMemoryVT() != RLD->getMemoryVT() ||
14257         // If this is an EXTLOAD, the kind of extension must match.
14258         (LLD->getExtensionType() != RLD->getExtensionType() &&
14259          // The only exception is if one of the extensions is anyext.
14260          LLD->getExtensionType() != ISD::EXTLOAD &&
14261          RLD->getExtensionType() != ISD::EXTLOAD) ||
14262         // FIXME: this discards src value information.  This is
14263         // over-conservative. It would be beneficial to be able to remember
14264         // both potential memory locations.  Since we are discarding
14265         // src value info, don't do the transformation if the memory
14266         // locations are not in the default address space.
14267         LLD->getPointerInfo().getAddrSpace() != 0 ||
14268         RLD->getPointerInfo().getAddrSpace() != 0 ||
14269         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
14270                                       LLD->getBasePtr().getValueType()))
14271       return false;
14272 
14273     // Check that the select condition doesn't reach either load.  If so,
14274     // folding this will induce a cycle into the DAG.  If not, this is safe to
14275     // xform, so create a select of the addresses.
14276     SDValue Addr;
14277     if (TheSelect->getOpcode() == ISD::SELECT) {
14278       SDNode *CondNode = TheSelect->getOperand(0).getNode();
14279       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
14280           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
14281         return false;
14282       // The loads must not depend on one another.
14283       if (LLD->isPredecessorOf(RLD) ||
14284           RLD->isPredecessorOf(LLD))
14285         return false;
14286       Addr = DAG.getSelect(SDLoc(TheSelect),
14287                            LLD->getBasePtr().getValueType(),
14288                            TheSelect->getOperand(0), LLD->getBasePtr(),
14289                            RLD->getBasePtr());
14290     } else {  // Otherwise SELECT_CC
14291       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
14292       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
14293 
14294       if ((LLD->hasAnyUseOfValue(1) &&
14295            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
14296           (RLD->hasAnyUseOfValue(1) &&
14297            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
14298         return false;
14299 
14300       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
14301                          LLD->getBasePtr().getValueType(),
14302                          TheSelect->getOperand(0),
14303                          TheSelect->getOperand(1),
14304                          LLD->getBasePtr(), RLD->getBasePtr(),
14305                          TheSelect->getOperand(4));
14306     }
14307 
14308     SDValue Load;
14309     // It is safe to replace the two loads if they have different alignments,
14310     // but the new load must be the minimum (most restrictive) alignment of the
14311     // inputs.
14312     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
14313     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
14314     if (!RLD->isInvariant())
14315       MMOFlags &= ~MachineMemOperand::MOInvariant;
14316     if (!RLD->isDereferenceable())
14317       MMOFlags &= ~MachineMemOperand::MODereferenceable;
14318     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
14319       // FIXME: Discards pointer and AA info.
14320       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
14321                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
14322                          MMOFlags);
14323     } else {
14324       // FIXME: Discards pointer and AA info.
14325       Load = DAG.getExtLoad(
14326           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
14327                                                   : LLD->getExtensionType(),
14328           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
14329           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
14330     }
14331 
14332     // Users of the select now use the result of the load.
14333     CombineTo(TheSelect, Load);
14334 
14335     // Users of the old loads now use the new load's chain.  We know the
14336     // old-load value is dead now.
14337     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
14338     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
14339     return true;
14340   }
14341 
14342   return false;
14343 }
14344 
14345 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
14346 /// where 'cond' is the comparison specified by CC.
14347 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
14348                                       SDValue N2, SDValue N3, ISD::CondCode CC,
14349                                       bool NotExtCompare) {
14350   // (x ? y : y) -> y.
14351   if (N2 == N3) return N2;
14352 
14353   EVT VT = N2.getValueType();
14354   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
14355   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
14356 
14357   // Determine if the condition we're dealing with is constant
14358   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
14359                               N0, N1, CC, DL, false);
14360   if (SCC.getNode()) AddToWorklist(SCC.getNode());
14361 
14362   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
14363     // fold select_cc true, x, y -> x
14364     // fold select_cc false, x, y -> y
14365     return !SCCC->isNullValue() ? N2 : N3;
14366   }
14367 
14368   // Check to see if we can simplify the select into an fabs node
14369   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14370     // Allow either -0.0 or 0.0
14371     if (CFP->isZero()) {
14372       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
14373       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
14374           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
14375           N2 == N3.getOperand(0))
14376         return DAG.getNode(ISD::FABS, DL, VT, N0);
14377 
14378       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
14379       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
14380           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
14381           N2.getOperand(0) == N3)
14382         return DAG.getNode(ISD::FABS, DL, VT, N3);
14383     }
14384   }
14385 
14386   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
14387   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
14388   // in it.  This is a win when the constant is not otherwise available because
14389   // it replaces two constant pool loads with one.  We only do this if the FP
14390   // type is known to be legal, because if it isn't, then we are before legalize
14391   // types an we want the other legalization to happen first (e.g. to avoid
14392   // messing with soft float) and if the ConstantFP is not legal, because if
14393   // it is legal, we may not need to store the FP constant in a constant pool.
14394   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
14395     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
14396       if (TLI.isTypeLegal(N2.getValueType()) &&
14397           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
14398                TargetLowering::Legal &&
14399            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
14400            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
14401           // If both constants have multiple uses, then we won't need to do an
14402           // extra load, they are likely around in registers for other users.
14403           (TV->hasOneUse() || FV->hasOneUse())) {
14404         Constant *Elts[] = {
14405           const_cast<ConstantFP*>(FV->getConstantFPValue()),
14406           const_cast<ConstantFP*>(TV->getConstantFPValue())
14407         };
14408         Type *FPTy = Elts[0]->getType();
14409         const DataLayout &TD = DAG.getDataLayout();
14410 
14411         // Create a ConstantArray of the two constants.
14412         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
14413         SDValue CPIdx =
14414             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
14415                                 TD.getPrefTypeAlignment(FPTy));
14416         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
14417 
14418         // Get the offsets to the 0 and 1 element of the array so that we can
14419         // select between them.
14420         SDValue Zero = DAG.getIntPtrConstant(0, DL);
14421         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
14422         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
14423 
14424         SDValue Cond = DAG.getSetCC(DL,
14425                                     getSetCCResultType(N0.getValueType()),
14426                                     N0, N1, CC);
14427         AddToWorklist(Cond.getNode());
14428         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
14429                                           Cond, One, Zero);
14430         AddToWorklist(CstOffset.getNode());
14431         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
14432                             CstOffset);
14433         AddToWorklist(CPIdx.getNode());
14434         return DAG.getLoad(
14435             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
14436             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
14437             Alignment);
14438       }
14439     }
14440 
14441   // Check to see if we can perform the "gzip trick", transforming
14442   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
14443   if (isNullConstant(N3) && CC == ISD::SETLT &&
14444       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
14445        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
14446     EVT XType = N0.getValueType();
14447     EVT AType = N2.getValueType();
14448     if (XType.bitsGE(AType)) {
14449       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
14450       // single-bit constant.
14451       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
14452         unsigned ShCtV = N2C->getAPIntValue().logBase2();
14453         ShCtV = XType.getSizeInBits() - ShCtV - 1;
14454         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
14455                                        getShiftAmountTy(N0.getValueType()));
14456         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
14457                                     XType, N0, ShCt);
14458         AddToWorklist(Shift.getNode());
14459 
14460         if (XType.bitsGT(AType)) {
14461           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14462           AddToWorklist(Shift.getNode());
14463         }
14464 
14465         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14466       }
14467 
14468       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
14469                                   XType, N0,
14470                                   DAG.getConstant(XType.getSizeInBits() - 1,
14471                                                   SDLoc(N0),
14472                                          getShiftAmountTy(N0.getValueType())));
14473       AddToWorklist(Shift.getNode());
14474 
14475       if (XType.bitsGT(AType)) {
14476         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14477         AddToWorklist(Shift.getNode());
14478       }
14479 
14480       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14481     }
14482   }
14483 
14484   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
14485   // where y is has a single bit set.
14486   // A plaintext description would be, we can turn the SELECT_CC into an AND
14487   // when the condition can be materialized as an all-ones register.  Any
14488   // single bit-test can be materialized as an all-ones register with
14489   // shift-left and shift-right-arith.
14490   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
14491       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
14492     SDValue AndLHS = N0->getOperand(0);
14493     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14494     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
14495       // Shift the tested bit over the sign bit.
14496       const APInt &AndMask = ConstAndRHS->getAPIntValue();
14497       SDValue ShlAmt =
14498         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
14499                         getShiftAmountTy(AndLHS.getValueType()));
14500       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
14501 
14502       // Now arithmetic right shift it all the way over, so the result is either
14503       // all-ones, or zero.
14504       SDValue ShrAmt =
14505         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
14506                         getShiftAmountTy(Shl.getValueType()));
14507       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
14508 
14509       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
14510     }
14511   }
14512 
14513   // fold select C, 16, 0 -> shl C, 4
14514   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
14515       TLI.getBooleanContents(N0.getValueType()) ==
14516           TargetLowering::ZeroOrOneBooleanContent) {
14517 
14518     // If the caller doesn't want us to simplify this into a zext of a compare,
14519     // don't do it.
14520     if (NotExtCompare && N2C->isOne())
14521       return SDValue();
14522 
14523     // Get a SetCC of the condition
14524     // NOTE: Don't create a SETCC if it's not legal on this target.
14525     if (!LegalOperations ||
14526         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
14527       SDValue Temp, SCC;
14528       // cast from setcc result type to select result type
14529       if (LegalTypes) {
14530         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
14531                             N0, N1, CC);
14532         if (N2.getValueType().bitsLT(SCC.getValueType()))
14533           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
14534                                         N2.getValueType());
14535         else
14536           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14537                              N2.getValueType(), SCC);
14538       } else {
14539         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
14540         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14541                            N2.getValueType(), SCC);
14542       }
14543 
14544       AddToWorklist(SCC.getNode());
14545       AddToWorklist(Temp.getNode());
14546 
14547       if (N2C->isOne())
14548         return Temp;
14549 
14550       // shl setcc result by log2 n2c
14551       return DAG.getNode(
14552           ISD::SHL, DL, N2.getValueType(), Temp,
14553           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
14554                           getShiftAmountTy(Temp.getValueType())));
14555     }
14556   }
14557 
14558   // Check to see if this is an integer abs.
14559   // select_cc setg[te] X,  0,  X, -X ->
14560   // select_cc setgt    X, -1,  X, -X ->
14561   // select_cc setl[te] X,  0, -X,  X ->
14562   // select_cc setlt    X,  1, -X,  X ->
14563   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14564   if (N1C) {
14565     ConstantSDNode *SubC = nullptr;
14566     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14567          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
14568         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
14569       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
14570     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
14571               (N1C->isOne() && CC == ISD::SETLT)) &&
14572              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
14573       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
14574 
14575     EVT XType = N0.getValueType();
14576     if (SubC && SubC->isNullValue() && XType.isInteger()) {
14577       SDLoc DL(N0);
14578       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
14579                                   N0,
14580                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
14581                                          getShiftAmountTy(N0.getValueType())));
14582       SDValue Add = DAG.getNode(ISD::ADD, DL,
14583                                 XType, N0, Shift);
14584       AddToWorklist(Shift.getNode());
14585       AddToWorklist(Add.getNode());
14586       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
14587     }
14588   }
14589 
14590   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
14591   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
14592   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
14593   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
14594   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
14595   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
14596   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
14597   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
14598   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
14599     SDValue ValueOnZero = N2;
14600     SDValue Count = N3;
14601     // If the condition is NE instead of E, swap the operands.
14602     if (CC == ISD::SETNE)
14603       std::swap(ValueOnZero, Count);
14604     // Check if the value on zero is a constant equal to the bits in the type.
14605     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
14606       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
14607         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
14608         // legal, combine to just cttz.
14609         if ((Count.getOpcode() == ISD::CTTZ ||
14610              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
14611             N0 == Count.getOperand(0) &&
14612             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
14613           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
14614         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
14615         // legal, combine to just ctlz.
14616         if ((Count.getOpcode() == ISD::CTLZ ||
14617              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
14618             N0 == Count.getOperand(0) &&
14619             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
14620           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
14621       }
14622     }
14623   }
14624 
14625   return SDValue();
14626 }
14627 
14628 /// This is a stub for TargetLowering::SimplifySetCC.
14629 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
14630                                    ISD::CondCode Cond, const SDLoc &DL,
14631                                    bool foldBooleans) {
14632   TargetLowering::DAGCombinerInfo
14633     DagCombineInfo(DAG, Level, false, this);
14634   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
14635 }
14636 
14637 /// Given an ISD::SDIV node expressing a divide by constant, return
14638 /// a DAG expression to select that will generate the same value by multiplying
14639 /// by a magic number.
14640 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14641 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
14642   // when optimising for minimum size, we don't want to expand a div to a mul
14643   // and a shift.
14644   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14645     return SDValue();
14646 
14647   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14648   if (!C)
14649     return SDValue();
14650 
14651   // Avoid division by zero.
14652   if (C->isNullValue())
14653     return SDValue();
14654 
14655   std::vector<SDNode*> Built;
14656   SDValue S =
14657       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14658 
14659   for (SDNode *N : Built)
14660     AddToWorklist(N);
14661   return S;
14662 }
14663 
14664 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
14665 /// DAG expression that will generate the same value by right shifting.
14666 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
14667   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14668   if (!C)
14669     return SDValue();
14670 
14671   // Avoid division by zero.
14672   if (C->isNullValue())
14673     return SDValue();
14674 
14675   std::vector<SDNode *> Built;
14676   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
14677 
14678   for (SDNode *N : Built)
14679     AddToWorklist(N);
14680   return S;
14681 }
14682 
14683 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
14684 /// expression that will generate the same value by multiplying by a magic
14685 /// number.
14686 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14687 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
14688   // when optimising for minimum size, we don't want to expand a div to a mul
14689   // and a shift.
14690   if (DAG.getMachineFunction().getFunction()->optForMinSize())
14691     return SDValue();
14692 
14693   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
14694   if (!C)
14695     return SDValue();
14696 
14697   // Avoid division by zero.
14698   if (C->isNullValue())
14699     return SDValue();
14700 
14701   std::vector<SDNode*> Built;
14702   SDValue S =
14703       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14704 
14705   for (SDNode *N : Built)
14706     AddToWorklist(N);
14707   return S;
14708 }
14709 
14710 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
14711   if (Level >= AfterLegalizeDAG)
14712     return SDValue();
14713 
14714   // Expose the DAG combiner to the target combiner implementations.
14715   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
14716 
14717   unsigned Iterations = 0;
14718   if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
14719     if (Iterations) {
14720       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14721       // For the reciprocal, we need to find the zero of the function:
14722       //   F(X) = A X - 1 [which has a zero at X = 1/A]
14723       //     =>
14724       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
14725       //     does not require additional intermediate precision]
14726       EVT VT = Op.getValueType();
14727       SDLoc DL(Op);
14728       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14729 
14730       AddToWorklist(Est.getNode());
14731 
14732       // Newton iterations: Est = Est + Est (1 - Arg * Est)
14733       for (unsigned i = 0; i < Iterations; ++i) {
14734         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
14735         AddToWorklist(NewEst.getNode());
14736 
14737         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
14738         AddToWorklist(NewEst.getNode());
14739 
14740         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14741         AddToWorklist(NewEst.getNode());
14742 
14743         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
14744         AddToWorklist(Est.getNode());
14745       }
14746     }
14747     return Est;
14748   }
14749 
14750   return SDValue();
14751 }
14752 
14753 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14754 /// For the reciprocal sqrt, we need to find the zero of the function:
14755 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14756 ///     =>
14757 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
14758 /// As a result, we precompute A/2 prior to the iteration loop.
14759 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
14760                                          unsigned Iterations,
14761                                          SDNodeFlags *Flags, bool Reciprocal) {
14762   EVT VT = Arg.getValueType();
14763   SDLoc DL(Arg);
14764   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
14765 
14766   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
14767   // this entire sequence requires only one FP constant.
14768   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
14769   AddToWorklist(HalfArg.getNode());
14770 
14771   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
14772   AddToWorklist(HalfArg.getNode());
14773 
14774   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
14775   for (unsigned i = 0; i < Iterations; ++i) {
14776     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
14777     AddToWorklist(NewEst.getNode());
14778 
14779     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
14780     AddToWorklist(NewEst.getNode());
14781 
14782     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
14783     AddToWorklist(NewEst.getNode());
14784 
14785     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
14786     AddToWorklist(Est.getNode());
14787   }
14788 
14789   // If non-reciprocal square root is requested, multiply the result by Arg.
14790   if (!Reciprocal) {
14791     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
14792     AddToWorklist(Est.getNode());
14793   }
14794 
14795   return Est;
14796 }
14797 
14798 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
14799 /// For the reciprocal sqrt, we need to find the zero of the function:
14800 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
14801 ///     =>
14802 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
14803 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
14804                                          unsigned Iterations,
14805                                          SDNodeFlags *Flags, bool Reciprocal) {
14806   EVT VT = Arg.getValueType();
14807   SDLoc DL(Arg);
14808   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
14809   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
14810 
14811   // This routine must enter the loop below to work correctly
14812   // when (Reciprocal == false).
14813   assert(Iterations > 0);
14814 
14815   // Newton iterations for reciprocal square root:
14816   // E = (E * -0.5) * ((A * E) * E + -3.0)
14817   for (unsigned i = 0; i < Iterations; ++i) {
14818     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
14819     AddToWorklist(AE.getNode());
14820 
14821     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
14822     AddToWorklist(AEE.getNode());
14823 
14824     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
14825     AddToWorklist(RHS.getNode());
14826 
14827     // When calculating a square root at the last iteration build:
14828     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
14829     // (notice a common subexpression)
14830     SDValue LHS;
14831     if (Reciprocal || (i + 1) < Iterations) {
14832       // RSQRT: LHS = (E * -0.5)
14833       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
14834     } else {
14835       // SQRT: LHS = (A * E) * -0.5
14836       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
14837     }
14838     AddToWorklist(LHS.getNode());
14839 
14840     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
14841     AddToWorklist(Est.getNode());
14842   }
14843 
14844   return Est;
14845 }
14846 
14847 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
14848 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
14849 /// Op can be zero.
14850 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
14851                                            bool Reciprocal) {
14852   if (Level >= AfterLegalizeDAG)
14853     return SDValue();
14854 
14855   // Expose the DAG combiner to the target combiner implementations.
14856   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
14857   unsigned Iterations = 0;
14858   bool UseOneConstNR = false;
14859   if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
14860     AddToWorklist(Est.getNode());
14861     if (Iterations) {
14862       Est = UseOneConstNR
14863                 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
14864                 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
14865     }
14866     return Est;
14867   }
14868 
14869   return SDValue();
14870 }
14871 
14872 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
14873   return buildSqrtEstimateImpl(Op, Flags, true);
14874 }
14875 
14876 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
14877   SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
14878   if (!Est)
14879     return SDValue();
14880 
14881   // Unfortunately, Est is now NaN if the input was exactly 0.
14882   // Select out this case and force the answer to 0.
14883   EVT VT = Est.getValueType();
14884   SDLoc DL(Op);
14885   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
14886   EVT CCVT = getSetCCResultType(VT);
14887   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
14888   AddToWorklist(ZeroCmp.getNode());
14889 
14890   Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
14891                     Zero, Est);
14892   AddToWorklist(Est.getNode());
14893   return Est;
14894 }
14895 
14896 /// Return true if base is a frame index, which is known not to alias with
14897 /// anything but itself.  Provides base object and offset as results.
14898 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
14899                            const GlobalValue *&GV, const void *&CV) {
14900   // Assume it is a primitive operation.
14901   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
14902 
14903   // If it's an adding a simple constant then integrate the offset.
14904   if (Base.getOpcode() == ISD::ADD) {
14905     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
14906       Base = Base.getOperand(0);
14907       Offset += C->getZExtValue();
14908     }
14909   }
14910 
14911   // Return the underlying GlobalValue, and update the Offset.  Return false
14912   // for GlobalAddressSDNode since the same GlobalAddress may be represented
14913   // by multiple nodes with different offsets.
14914   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
14915     GV = G->getGlobal();
14916     Offset += G->getOffset();
14917     return false;
14918   }
14919 
14920   // Return the underlying Constant value, and update the Offset.  Return false
14921   // for ConstantSDNodes since the same constant pool entry may be represented
14922   // by multiple nodes with different offsets.
14923   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
14924     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
14925                                          : (const void *)C->getConstVal();
14926     Offset += C->getOffset();
14927     return false;
14928   }
14929   // If it's any of the following then it can't alias with anything but itself.
14930   return isa<FrameIndexSDNode>(Base);
14931 }
14932 
14933 /// Return true if there is any possibility that the two addresses overlap.
14934 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
14935   // If they are the same then they must be aliases.
14936   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
14937 
14938   // If they are both volatile then they cannot be reordered.
14939   if (Op0->isVolatile() && Op1->isVolatile()) return true;
14940 
14941   // If one operation reads from invariant memory, and the other may store, they
14942   // cannot alias. These should really be checking the equivalent of mayWrite,
14943   // but it only matters for memory nodes other than load /store.
14944   if (Op0->isInvariant() && Op1->writeMem())
14945     return false;
14946 
14947   if (Op1->isInvariant() && Op0->writeMem())
14948     return false;
14949 
14950   // Gather base node and offset information.
14951   SDValue Base1, Base2;
14952   int64_t Offset1, Offset2;
14953   const GlobalValue *GV1, *GV2;
14954   const void *CV1, *CV2;
14955   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
14956                                       Base1, Offset1, GV1, CV1);
14957   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
14958                                       Base2, Offset2, GV2, CV2);
14959 
14960   // If they have a same base address then check to see if they overlap.
14961   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
14962     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
14963              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
14964 
14965   // It is possible for different frame indices to alias each other, mostly
14966   // when tail call optimization reuses return address slots for arguments.
14967   // To catch this case, look up the actual index of frame indices to compute
14968   // the real alias relationship.
14969   if (isFrameIndex1 && isFrameIndex2) {
14970     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14971     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
14972     Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
14973     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
14974              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
14975   }
14976 
14977   // Otherwise, if we know what the bases are, and they aren't identical, then
14978   // we know they cannot alias.
14979   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
14980     return false;
14981 
14982   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
14983   // compared to the size and offset of the access, we may be able to prove they
14984   // do not alias.  This check is conservative for now to catch cases created by
14985   // splitting vector types.
14986   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
14987       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
14988       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
14989        Op1->getMemoryVT().getSizeInBits() >> 3) &&
14990       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
14991     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
14992     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
14993 
14994     // There is no overlap between these relatively aligned accesses of similar
14995     // size, return no alias.
14996     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
14997         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
14998       return false;
14999   }
15000 
15001   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
15002                    ? CombinerGlobalAA
15003                    : DAG.getSubtarget().useAA();
15004 #ifndef NDEBUG
15005   if (CombinerAAOnlyFunc.getNumOccurrences() &&
15006       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
15007     UseAA = false;
15008 #endif
15009   if (UseAA &&
15010       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
15011     // Use alias analysis information.
15012     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
15013                                  Op1->getSrcValueOffset());
15014     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
15015         Op0->getSrcValueOffset() - MinOffset;
15016     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
15017         Op1->getSrcValueOffset() - MinOffset;
15018     AliasResult AAResult =
15019         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
15020                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
15021                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
15022                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
15023     if (AAResult == NoAlias)
15024       return false;
15025   }
15026 
15027   // Otherwise we have to assume they alias.
15028   return true;
15029 }
15030 
15031 /// Walk up chain skipping non-aliasing memory nodes,
15032 /// looking for aliasing nodes and adding them to the Aliases vector.
15033 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
15034                                    SmallVectorImpl<SDValue> &Aliases) {
15035   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
15036   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
15037 
15038   // Get alias information for node.
15039   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
15040 
15041   // Starting off.
15042   Chains.push_back(OriginalChain);
15043   unsigned Depth = 0;
15044 
15045   // Look at each chain and determine if it is an alias.  If so, add it to the
15046   // aliases list.  If not, then continue up the chain looking for the next
15047   // candidate.
15048   while (!Chains.empty()) {
15049     SDValue Chain = Chains.pop_back_val();
15050 
15051     // For TokenFactor nodes, look at each operand and only continue up the
15052     // chain until we reach the depth limit.
15053     //
15054     // FIXME: The depth check could be made to return the last non-aliasing
15055     // chain we found before we hit a tokenfactor rather than the original
15056     // chain.
15057     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
15058       Aliases.clear();
15059       Aliases.push_back(OriginalChain);
15060       return;
15061     }
15062 
15063     // Don't bother if we've been before.
15064     if (!Visited.insert(Chain.getNode()).second)
15065       continue;
15066 
15067     switch (Chain.getOpcode()) {
15068     case ISD::EntryToken:
15069       // Entry token is ideal chain operand, but handled in FindBetterChain.
15070       break;
15071 
15072     case ISD::LOAD:
15073     case ISD::STORE: {
15074       // Get alias information for Chain.
15075       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
15076           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
15077 
15078       // If chain is alias then stop here.
15079       if (!(IsLoad && IsOpLoad) &&
15080           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
15081         Aliases.push_back(Chain);
15082       } else {
15083         // Look further up the chain.
15084         Chains.push_back(Chain.getOperand(0));
15085         ++Depth;
15086       }
15087       break;
15088     }
15089 
15090     case ISD::TokenFactor:
15091       // We have to check each of the operands of the token factor for "small"
15092       // token factors, so we queue them up.  Adding the operands to the queue
15093       // (stack) in reverse order maintains the original order and increases the
15094       // likelihood that getNode will find a matching token factor (CSE.)
15095       if (Chain.getNumOperands() > 16) {
15096         Aliases.push_back(Chain);
15097         break;
15098       }
15099       for (unsigned n = Chain.getNumOperands(); n;)
15100         Chains.push_back(Chain.getOperand(--n));
15101       ++Depth;
15102       break;
15103 
15104     default:
15105       // For all other instructions we will just have to take what we can get.
15106       Aliases.push_back(Chain);
15107       break;
15108     }
15109   }
15110 }
15111 
15112 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
15113 /// (aliasing node.)
15114 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
15115   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
15116 
15117   // Accumulate all the aliases to this node.
15118   GatherAllAliases(N, OldChain, Aliases);
15119 
15120   // If no operands then chain to entry token.
15121   if (Aliases.size() == 0)
15122     return DAG.getEntryNode();
15123 
15124   // If a single operand then chain to it.  We don't need to revisit it.
15125   if (Aliases.size() == 1)
15126     return Aliases[0];
15127 
15128   // Construct a custom tailored token factor.
15129   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
15130 }
15131 
15132 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
15133   // This holds the base pointer, index, and the offset in bytes from the base
15134   // pointer.
15135   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
15136 
15137   // We must have a base and an offset.
15138   if (!BasePtr.Base.getNode())
15139     return false;
15140 
15141   // Do not handle stores to undef base pointers.
15142   if (BasePtr.Base.isUndef())
15143     return false;
15144 
15145   SmallVector<StoreSDNode *, 8> ChainedStores;
15146   ChainedStores.push_back(St);
15147 
15148   // Walk up the chain and look for nodes with offsets from the same
15149   // base pointer. Stop when reaching an instruction with a different kind
15150   // or instruction which has a different base pointer.
15151   StoreSDNode *Index = St;
15152   while (Index) {
15153     // If the chain has more than one use, then we can't reorder the mem ops.
15154     if (Index != St && !SDValue(Index, 0)->hasOneUse())
15155       break;
15156 
15157     if (Index->isVolatile() || Index->isIndexed())
15158       break;
15159 
15160     // Find the base pointer and offset for this memory node.
15161     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
15162 
15163     // Check that the base pointer is the same as the original one.
15164     if (!Ptr.equalBaseIndex(BasePtr))
15165       break;
15166 
15167     // Find the next memory operand in the chain. If the next operand in the
15168     // chain is a store then move up and continue the scan with the next
15169     // memory operand. If the next operand is a load save it and use alias
15170     // information to check if it interferes with anything.
15171     SDNode *NextInChain = Index->getChain().getNode();
15172     while (true) {
15173       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
15174         // We found a store node. Use it for the next iteration.
15175         if (STn->isVolatile() || STn->isIndexed()) {
15176           Index = nullptr;
15177           break;
15178         }
15179         ChainedStores.push_back(STn);
15180         Index = STn;
15181         break;
15182       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
15183         NextInChain = Ldn->getChain().getNode();
15184         continue;
15185       } else {
15186         Index = nullptr;
15187         break;
15188       }
15189     }
15190   }
15191 
15192   bool MadeChangeToSt = false;
15193   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
15194 
15195   for (StoreSDNode *ChainedStore : ChainedStores) {
15196     SDValue Chain = ChainedStore->getChain();
15197     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
15198 
15199     if (Chain != BetterChain) {
15200       if (ChainedStore == St)
15201         MadeChangeToSt = true;
15202       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
15203     }
15204   }
15205 
15206   // Do all replacements after finding the replacements to make to avoid making
15207   // the chains more complicated by introducing new TokenFactors.
15208   for (auto Replacement : BetterChains)
15209     replaceStoreChain(Replacement.first, Replacement.second);
15210 
15211   return MadeChangeToSt;
15212 }
15213 
15214 /// This is the entry point for the file.
15215 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
15216                            CodeGenOpt::Level OptLevel) {
15217   /// This is the main entry point to this class.
15218   DAGCombiner(*this, AA, OptLevel).Run(Level);
15219 }
15220