1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Target/TargetLowering.h"
39 #include "llvm/Target/TargetOptions.h"
40 #include "llvm/Target/TargetRegisterInfo.h"
41 #include "llvm/Target/TargetSubtargetInfo.h"
42 #include <algorithm>
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "dagcombine"
46 
47 STATISTIC(NodesCombined   , "Number of dag nodes combined");
48 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
49 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
50 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
51 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
52 STATISTIC(SlicedLoads, "Number of load sliced");
53 
54 namespace {
55   static cl::opt<bool>
56     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
57                cl::desc("Enable DAG combiner's use of IR alias analysis"));
58 
59   static cl::opt<bool>
60     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
61                cl::desc("Enable DAG combiner's use of TBAA"));
62 
63 #ifndef NDEBUG
64   static cl::opt<std::string>
65     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
66                cl::desc("Only use DAG-combiner alias analysis in this"
67                         " function"));
68 #endif
69 
70   /// Hidden option to stress test load slicing, i.e., when this option
71   /// is enabled, load slicing bypasses most of its profitability guards.
72   static cl::opt<bool>
73   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
74                     cl::desc("Bypass the profitability model of load "
75                              "slicing"),
76                     cl::init(false));
77 
78   static cl::opt<bool>
79     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
80                       cl::desc("DAG combiner may split indexing from loads"));
81 
82 //------------------------------ DAGCombiner ---------------------------------//
83 
84   class DAGCombiner {
85     SelectionDAG &DAG;
86     const TargetLowering &TLI;
87     CombineLevel Level;
88     CodeGenOpt::Level OptLevel;
89     bool LegalOperations;
90     bool LegalTypes;
91     bool ForCodeSize;
92 
93     /// \brief Worklist of all of the nodes that need to be simplified.
94     ///
95     /// This must behave as a stack -- new nodes to process are pushed onto the
96     /// back and when processing we pop off of the back.
97     ///
98     /// The worklist will not contain duplicates but may contain null entries
99     /// due to nodes being deleted from the underlying DAG.
100     SmallVector<SDNode *, 64> Worklist;
101 
102     /// \brief Mapping from an SDNode to its position on the worklist.
103     ///
104     /// This is used to find and remove nodes from the worklist (by nulling
105     /// them) when they are deleted from the underlying DAG. It relies on
106     /// stable indices of nodes within the worklist.
107     DenseMap<SDNode *, unsigned> WorklistMap;
108 
109     /// \brief Set of nodes which have been combined (at least once).
110     ///
111     /// This is used to allow us to reliably add any operands of a DAG node
112     /// which have not yet been combined to the worklist.
113     SmallPtrSet<SDNode *, 32> CombinedNodes;
114 
115     // AA - Used for DAG load/store alias analysis.
116     AliasAnalysis &AA;
117 
118     /// When an instruction is simplified, add all users of the instruction to
119     /// the work lists because they might get more simplified now.
120     void AddUsersToWorklist(SDNode *N) {
121       for (SDNode *Node : N->uses())
122         AddToWorklist(Node);
123     }
124 
125     /// Call the node-specific routine that folds each particular type of node.
126     SDValue visit(SDNode *N);
127 
128   public:
129     /// Add to the worklist making sure its instance is at the back (next to be
130     /// processed.)
131     void AddToWorklist(SDNode *N) {
132       assert(N->getOpcode() != ISD::DELETED_NODE &&
133              "Deleted Node added to Worklist");
134 
135       // Skip handle nodes as they can't usefully be combined and confuse the
136       // zero-use deletion strategy.
137       if (N->getOpcode() == ISD::HANDLENODE)
138         return;
139 
140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
141         Worklist.push_back(N);
142     }
143 
144     /// Remove all instances of N from the worklist.
145     void removeFromWorklist(SDNode *N) {
146       CombinedNodes.erase(N);
147 
148       auto It = WorklistMap.find(N);
149       if (It == WorklistMap.end())
150         return; // Not in the worklist.
151 
152       // Null out the entry rather than erasing it to avoid a linear operation.
153       Worklist[It->second] = nullptr;
154       WorklistMap.erase(It);
155     }
156 
157     void deleteAndRecombine(SDNode *N);
158     bool recursivelyDeleteUnusedNodes(SDNode *N);
159 
160     /// Replaces all uses of the results of one DAG node with new values.
161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
162                       bool AddTo = true);
163 
164     /// Replaces all uses of the results of one DAG node with new values.
165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
166       return CombineTo(N, &Res, 1, AddTo);
167     }
168 
169     /// Replaces all uses of the results of one DAG node with new values.
170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
171                       bool AddTo = true) {
172       SDValue To[] = { Res0, Res1 };
173       return CombineTo(N, To, 2, AddTo);
174     }
175 
176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
177 
178   private:
179     unsigned MaximumLegalStoreInBits;
180 
181     /// Check the specified integer node value to see if it can be simplified or
182     /// if things it uses can be simplified by bit propagation.
183     /// If so, return true.
184     bool SimplifyDemandedBits(SDValue Op) {
185       unsigned BitWidth = Op.getScalarValueSizeInBits();
186       APInt Demanded = APInt::getAllOnesValue(BitWidth);
187       return SimplifyDemandedBits(Op, Demanded);
188     }
189 
190     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
191 
192     bool CombineToPreIndexedLoadStore(SDNode *N);
193     bool CombineToPostIndexedLoadStore(SDNode *N);
194     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
195     bool SliceUpLoad(SDNode *N);
196 
197     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
198     ///   load.
199     ///
200     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
201     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
202     /// \param EltNo index of the vector element to load.
203     /// \param OriginalLoad load that EVE came from to be replaced.
204     /// \returns EVE on success SDValue() on failure.
205     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
206         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
207     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
208     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
209     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
210     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
211     SDValue PromoteIntBinOp(SDValue Op);
212     SDValue PromoteIntShiftOp(SDValue Op);
213     SDValue PromoteExtend(SDValue Op);
214     bool PromoteLoad(SDValue Op);
215 
216     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
217                          SDValue ExtLoad, const SDLoc &DL,
218                          ISD::NodeType ExtType);
219 
220     /// Call the node-specific routine that knows how to fold each
221     /// particular type of node. If that doesn't do anything, try the
222     /// target-specific DAG combines.
223     SDValue combine(SDNode *N);
224 
225     // Visitation implementation - Implement dag node combining for different
226     // node types.  The semantics are as follows:
227     // Return Value:
228     //   SDValue.getNode() == 0 - No change was made
229     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
230     //   otherwise              - N should be replaced by the returned Operand.
231     //
232     SDValue visitTokenFactor(SDNode *N);
233     SDValue visitMERGE_VALUES(SDNode *N);
234     SDValue visitADD(SDNode *N);
235     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
236     SDValue visitSUB(SDNode *N);
237     SDValue visitADDC(SDNode *N);
238     SDValue visitUADDO(SDNode *N);
239     SDValue visitSUBC(SDNode *N);
240     SDValue visitUSUBO(SDNode *N);
241     SDValue visitADDE(SDNode *N);
242     SDValue visitSUBE(SDNode *N);
243     SDValue visitMUL(SDNode *N);
244     SDValue useDivRem(SDNode *N);
245     SDValue visitSDIV(SDNode *N);
246     SDValue visitUDIV(SDNode *N);
247     SDValue visitREM(SDNode *N);
248     SDValue visitMULHU(SDNode *N);
249     SDValue visitMULHS(SDNode *N);
250     SDValue visitSMUL_LOHI(SDNode *N);
251     SDValue visitUMUL_LOHI(SDNode *N);
252     SDValue visitSMULO(SDNode *N);
253     SDValue visitUMULO(SDNode *N);
254     SDValue visitIMINMAX(SDNode *N);
255     SDValue visitAND(SDNode *N);
256     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
257     SDValue visitOR(SDNode *N);
258     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
259     SDValue visitXOR(SDNode *N);
260     SDValue SimplifyVBinOp(SDNode *N);
261     SDValue visitSHL(SDNode *N);
262     SDValue visitSRA(SDNode *N);
263     SDValue visitSRL(SDNode *N);
264     SDValue visitRotate(SDNode *N);
265     SDValue visitABS(SDNode *N);
266     SDValue visitBSWAP(SDNode *N);
267     SDValue visitBITREVERSE(SDNode *N);
268     SDValue visitCTLZ(SDNode *N);
269     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
270     SDValue visitCTTZ(SDNode *N);
271     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
272     SDValue visitCTPOP(SDNode *N);
273     SDValue visitSELECT(SDNode *N);
274     SDValue visitVSELECT(SDNode *N);
275     SDValue visitSELECT_CC(SDNode *N);
276     SDValue visitSETCC(SDNode *N);
277     SDValue visitSETCCE(SDNode *N);
278     SDValue visitSIGN_EXTEND(SDNode *N);
279     SDValue visitZERO_EXTEND(SDNode *N);
280     SDValue visitANY_EXTEND(SDNode *N);
281     SDValue visitAssertZext(SDNode *N);
282     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
283     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
284     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
285     SDValue visitTRUNCATE(SDNode *N);
286     SDValue visitBITCAST(SDNode *N);
287     SDValue visitBUILD_PAIR(SDNode *N);
288     SDValue visitFADD(SDNode *N);
289     SDValue visitFSUB(SDNode *N);
290     SDValue visitFMUL(SDNode *N);
291     SDValue visitFMA(SDNode *N);
292     SDValue visitFDIV(SDNode *N);
293     SDValue visitFREM(SDNode *N);
294     SDValue visitFSQRT(SDNode *N);
295     SDValue visitFCOPYSIGN(SDNode *N);
296     SDValue visitSINT_TO_FP(SDNode *N);
297     SDValue visitUINT_TO_FP(SDNode *N);
298     SDValue visitFP_TO_SINT(SDNode *N);
299     SDValue visitFP_TO_UINT(SDNode *N);
300     SDValue visitFP_ROUND(SDNode *N);
301     SDValue visitFP_ROUND_INREG(SDNode *N);
302     SDValue visitFP_EXTEND(SDNode *N);
303     SDValue visitFNEG(SDNode *N);
304     SDValue visitFABS(SDNode *N);
305     SDValue visitFCEIL(SDNode *N);
306     SDValue visitFTRUNC(SDNode *N);
307     SDValue visitFFLOOR(SDNode *N);
308     SDValue visitFMINNUM(SDNode *N);
309     SDValue visitFMAXNUM(SDNode *N);
310     SDValue visitBRCOND(SDNode *N);
311     SDValue visitBR_CC(SDNode *N);
312     SDValue visitLOAD(SDNode *N);
313 
314     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
315     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
316 
317     SDValue visitSTORE(SDNode *N);
318     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
319     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
320     SDValue visitBUILD_VECTOR(SDNode *N);
321     SDValue visitCONCAT_VECTORS(SDNode *N);
322     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
323     SDValue visitVECTOR_SHUFFLE(SDNode *N);
324     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
325     SDValue visitINSERT_SUBVECTOR(SDNode *N);
326     SDValue visitMLOAD(SDNode *N);
327     SDValue visitMSTORE(SDNode *N);
328     SDValue visitMGATHER(SDNode *N);
329     SDValue visitMSCATTER(SDNode *N);
330     SDValue visitFP_TO_FP16(SDNode *N);
331     SDValue visitFP16_TO_FP(SDNode *N);
332 
333     SDValue visitFADDForFMACombine(SDNode *N);
334     SDValue visitFSUBForFMACombine(SDNode *N);
335     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
336 
337     SDValue XformToShuffleWithZero(SDNode *N);
338     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
339                            SDValue RHS);
340 
341     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
342 
343     SDValue foldSelectOfConstants(SDNode *N);
344     SDValue foldBinOpIntoSelect(SDNode *BO);
345     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
346     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
347     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
348     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
349                              SDValue N2, SDValue N3, ISD::CondCode CC,
350                              bool NotExtCompare = false);
351     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
352                                    SDValue N2, SDValue N3, ISD::CondCode CC);
353     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
354                           const SDLoc &DL, bool foldBooleans = true);
355 
356     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
357                            SDValue &CC) const;
358     bool isOneUseSetCC(SDValue N) const;
359 
360     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
361                                          unsigned HiOp);
362     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
363     SDValue CombineExtLoad(SDNode *N);
364     SDValue combineRepeatedFPDivisors(SDNode *N);
365     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
366     SDValue BuildSDIV(SDNode *N);
367     SDValue BuildSDIVPow2(SDNode *N);
368     SDValue BuildUDIV(SDNode *N);
369     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
370     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
371     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
372     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
373     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
374     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
375                                 SDNodeFlags *Flags, bool Reciprocal);
376     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
377                                 SDNodeFlags *Flags, bool Reciprocal);
378     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
379                                bool DemandHighBits = true);
380     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
381     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
382                               SDValue InnerPos, SDValue InnerNeg,
383                               unsigned PosOpcode, unsigned NegOpcode,
384                               const SDLoc &DL);
385     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
386     SDValue MatchLoadCombine(SDNode *N);
387     SDValue ReduceLoadWidth(SDNode *N);
388     SDValue ReduceLoadOpStoreWidth(SDNode *N);
389     SDValue splitMergedValStore(StoreSDNode *ST);
390     SDValue TransformFPLoadStorePair(SDNode *N);
391     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
392     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
393     SDValue reduceBuildVecToShuffle(SDNode *N);
394     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
395                                   ArrayRef<int> VectorMask, SDValue VecIn1,
396                                   SDValue VecIn2, unsigned LeftIdx);
397 
398     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
399 
400     /// Walk up chain skipping non-aliasing memory nodes,
401     /// looking for aliasing nodes and adding them to the Aliases vector.
402     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
403                           SmallVectorImpl<SDValue> &Aliases);
404 
405     /// Return true if there is any possibility that the two addresses overlap.
406     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
407 
408     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
409     /// chain (aliasing node.)
410     SDValue FindBetterChain(SDNode *N, SDValue Chain);
411 
412     /// Try to replace a store and any possibly adjacent stores on
413     /// consecutive chains with better chains. Return true only if St is
414     /// replaced.
415     ///
416     /// Notice that other chains may still be replaced even if the function
417     /// returns false.
418     bool findBetterNeighborChains(StoreSDNode *St);
419 
420     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
421     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
422 
423     /// Holds a pointer to an LSBaseSDNode as well as information on where it
424     /// is located in a sequence of memory operations connected by a chain.
425     struct MemOpLink {
426       MemOpLink(LSBaseSDNode *N, int64_t Offset)
427           : MemNode(N), OffsetFromBase(Offset) {}
428       // Ptr to the mem node.
429       LSBaseSDNode *MemNode;
430       // Offset from the base ptr.
431       int64_t OffsetFromBase;
432     };
433 
434     /// This is a helper function for visitMUL to check the profitability
435     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
436     /// MulNode is the original multiply, AddNode is (add x, c1),
437     /// and ConstNode is c2.
438     bool isMulAddWithConstProfitable(SDNode *MulNode,
439                                      SDValue &AddNode,
440                                      SDValue &ConstNode);
441 
442 
443     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
444     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
445     /// the type of the loaded value to be extended.  LoadedVT returns the type
446     /// of the original loaded value.  NarrowLoad returns whether the load would
447     /// need to be narrowed in order to match.
448     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
449                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
450                           bool &NarrowLoad);
451 
452     /// This is a helper function for MergeConsecutiveStores. When the source
453     /// elements of the consecutive stores are all constants or all extracted
454     /// vector elements, try to merge them into one larger store.
455     /// \return True if a merged store was created.
456     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
457                                          EVT MemVT, unsigned NumStores,
458                                          bool IsConstantSrc, bool UseVector);
459 
460     /// This is a helper function for MergeConsecutiveStores.
461     /// Stores that may be merged are placed in StoreNodes.
462     void getStoreMergeCandidates(StoreSDNode *St,
463                                  SmallVectorImpl<MemOpLink> &StoreNodes);
464 
465     /// Helper function for MergeConsecutiveStores. Checks if
466     /// Candidate stores have indirect dependency through their
467     /// operands. \return True if safe to merge
468     bool checkMergeStoreCandidatesForDependencies(
469         SmallVectorImpl<MemOpLink> &StoreNodes);
470 
471     /// Merge consecutive store operations into a wide store.
472     /// This optimization uses wide integers or vectors when possible.
473     /// \return number of stores that were merged into a merged store (the
474     /// affected nodes are stored as a prefix in \p StoreNodes).
475     bool MergeConsecutiveStores(StoreSDNode *N);
476 
477     /// \brief Try to transform a truncation where C is a constant:
478     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
479     ///
480     /// \p N needs to be a truncation and its first operand an AND. Other
481     /// requirements are checked by the function (e.g. that trunc is
482     /// single-use) and if missed an empty SDValue is returned.
483     SDValue distributeTruncateThroughAnd(SDNode *N);
484 
485   public:
486     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
487         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
488           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
489       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
490 
491       MaximumLegalStoreInBits = 0;
492       for (MVT VT : MVT::all_valuetypes())
493         if (EVT(VT).isSimple() && VT != MVT::Other &&
494             TLI.isTypeLegal(EVT(VT)) &&
495             VT.getSizeInBits() >= MaximumLegalStoreInBits)
496           MaximumLegalStoreInBits = VT.getSizeInBits();
497     }
498 
499     /// Runs the dag combiner on all nodes in the work list
500     void Run(CombineLevel AtLevel);
501 
502     SelectionDAG &getDAG() const { return DAG; }
503 
504     /// Returns a type large enough to hold any valid shift amount - before type
505     /// legalization these can be huge.
506     EVT getShiftAmountTy(EVT LHSTy) {
507       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
508       if (LHSTy.isVector())
509         return LHSTy;
510       auto &DL = DAG.getDataLayout();
511       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
512                         : TLI.getPointerTy(DL);
513     }
514 
515     /// This method returns true if we are running before type legalization or
516     /// if the specified VT is legal.
517     bool isTypeLegal(const EVT &VT) {
518       if (!LegalTypes) return true;
519       return TLI.isTypeLegal(VT);
520     }
521 
522     /// Convenience wrapper around TargetLowering::getSetCCResultType
523     EVT getSetCCResultType(EVT VT) const {
524       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
525     }
526   };
527 }
528 
529 
530 namespace {
531 /// This class is a DAGUpdateListener that removes any deleted
532 /// nodes from the worklist.
533 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
534   DAGCombiner &DC;
535 public:
536   explicit WorklistRemover(DAGCombiner &dc)
537     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
538 
539   void NodeDeleted(SDNode *N, SDNode *E) override {
540     DC.removeFromWorklist(N);
541   }
542 };
543 }
544 
545 //===----------------------------------------------------------------------===//
546 //  TargetLowering::DAGCombinerInfo implementation
547 //===----------------------------------------------------------------------===//
548 
549 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
550   ((DAGCombiner*)DC)->AddToWorklist(N);
551 }
552 
553 SDValue TargetLowering::DAGCombinerInfo::
554 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
555   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
556 }
557 
558 SDValue TargetLowering::DAGCombinerInfo::
559 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
560   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
561 }
562 
563 
564 SDValue TargetLowering::DAGCombinerInfo::
565 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
566   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
567 }
568 
569 void TargetLowering::DAGCombinerInfo::
570 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
571   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
572 }
573 
574 //===----------------------------------------------------------------------===//
575 // Helper Functions
576 //===----------------------------------------------------------------------===//
577 
578 void DAGCombiner::deleteAndRecombine(SDNode *N) {
579   removeFromWorklist(N);
580 
581   // If the operands of this node are only used by the node, they will now be
582   // dead. Make sure to re-visit them and recursively delete dead nodes.
583   for (const SDValue &Op : N->ops())
584     // For an operand generating multiple values, one of the values may
585     // become dead allowing further simplification (e.g. split index
586     // arithmetic from an indexed load).
587     if (Op->hasOneUse() || Op->getNumValues() > 1)
588       AddToWorklist(Op.getNode());
589 
590   DAG.DeleteNode(N);
591 }
592 
593 /// Return 1 if we can compute the negated form of the specified expression for
594 /// the same cost as the expression itself, or 2 if we can compute the negated
595 /// form more cheaply than the expression itself.
596 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
597                                const TargetLowering &TLI,
598                                const TargetOptions *Options,
599                                unsigned Depth = 0) {
600   // fneg is removable even if it has multiple uses.
601   if (Op.getOpcode() == ISD::FNEG) return 2;
602 
603   // Don't allow anything with multiple uses.
604   if (!Op.hasOneUse()) return 0;
605 
606   // Don't recurse exponentially.
607   if (Depth > 6) return 0;
608 
609   switch (Op.getOpcode()) {
610   default: return false;
611   case ISD::ConstantFP: {
612     if (!LegalOperations)
613       return 1;
614 
615     // Don't invert constant FP values after legalization unless the target says
616     // the negated constant is legal.
617     EVT VT = Op.getValueType();
618     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
619       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
620   }
621   case ISD::FADD:
622     // FIXME: determine better conditions for this xform.
623     if (!Options->UnsafeFPMath) return 0;
624 
625     // After operation legalization, it might not be legal to create new FSUBs.
626     if (LegalOperations &&
627         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
628       return 0;
629 
630     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
631     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
632                                     Options, Depth + 1))
633       return V;
634     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
635     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
636                               Depth + 1);
637   case ISD::FSUB:
638     // We can't turn -(A-B) into B-A when we honor signed zeros.
639     if (!Options->NoSignedZerosFPMath &&
640         !Op.getNode()->getFlags()->hasNoSignedZeros())
641       return 0;
642 
643     // fold (fneg (fsub A, B)) -> (fsub B, A)
644     return 1;
645 
646   case ISD::FMUL:
647   case ISD::FDIV:
648     if (Options->HonorSignDependentRoundingFPMath()) return 0;
649 
650     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
651     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
652                                     Options, Depth + 1))
653       return V;
654 
655     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
656                               Depth + 1);
657 
658   case ISD::FP_EXTEND:
659   case ISD::FP_ROUND:
660   case ISD::FSIN:
661     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
662                               Depth + 1);
663   }
664 }
665 
666 /// If isNegatibleForFree returns true, return the newly negated expression.
667 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
668                                     bool LegalOperations, unsigned Depth = 0) {
669   const TargetOptions &Options = DAG.getTarget().Options;
670   // fneg is removable even if it has multiple uses.
671   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
672 
673   // Don't allow anything with multiple uses.
674   assert(Op.hasOneUse() && "Unknown reuse!");
675 
676   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
677 
678   const SDNodeFlags *Flags = Op.getNode()->getFlags();
679 
680   switch (Op.getOpcode()) {
681   default: llvm_unreachable("Unknown code");
682   case ISD::ConstantFP: {
683     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
684     V.changeSign();
685     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
686   }
687   case ISD::FADD:
688     // FIXME: determine better conditions for this xform.
689     assert(Options.UnsafeFPMath);
690 
691     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
692     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
693                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
694       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
695                          GetNegatedExpression(Op.getOperand(0), DAG,
696                                               LegalOperations, Depth+1),
697                          Op.getOperand(1), Flags);
698     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
699     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
700                        GetNegatedExpression(Op.getOperand(1), DAG,
701                                             LegalOperations, Depth+1),
702                        Op.getOperand(0), Flags);
703   case ISD::FSUB:
704     // fold (fneg (fsub 0, B)) -> B
705     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
706       if (N0CFP->isZero())
707         return Op.getOperand(1);
708 
709     // fold (fneg (fsub A, B)) -> (fsub B, A)
710     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
711                        Op.getOperand(1), Op.getOperand(0), Flags);
712 
713   case ISD::FMUL:
714   case ISD::FDIV:
715     assert(!Options.HonorSignDependentRoundingFPMath());
716 
717     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
718     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
719                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
720       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
721                          GetNegatedExpression(Op.getOperand(0), DAG,
722                                               LegalOperations, Depth+1),
723                          Op.getOperand(1), Flags);
724 
725     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
726     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
727                        Op.getOperand(0),
728                        GetNegatedExpression(Op.getOperand(1), DAG,
729                                             LegalOperations, Depth+1), Flags);
730 
731   case ISD::FP_EXTEND:
732   case ISD::FSIN:
733     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
734                        GetNegatedExpression(Op.getOperand(0), DAG,
735                                             LegalOperations, Depth+1));
736   case ISD::FP_ROUND:
737       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
738                          GetNegatedExpression(Op.getOperand(0), DAG,
739                                               LegalOperations, Depth+1),
740                          Op.getOperand(1));
741   }
742 }
743 
744 // APInts must be the same size for most operations, this helper
745 // function zero extends the shorter of the pair so that they match.
746 // We provide an Offset so that we can create bitwidths that won't overflow.
747 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
748   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
749   LHS = LHS.zextOrSelf(Bits);
750   RHS = RHS.zextOrSelf(Bits);
751 }
752 
753 // Return true if this node is a setcc, or is a select_cc
754 // that selects between the target values used for true and false, making it
755 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
756 // the appropriate nodes based on the type of node we are checking. This
757 // simplifies life a bit for the callers.
758 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
759                                     SDValue &CC) const {
760   if (N.getOpcode() == ISD::SETCC) {
761     LHS = N.getOperand(0);
762     RHS = N.getOperand(1);
763     CC  = N.getOperand(2);
764     return true;
765   }
766 
767   if (N.getOpcode() != ISD::SELECT_CC ||
768       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
769       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
770     return false;
771 
772   if (TLI.getBooleanContents(N.getValueType()) ==
773       TargetLowering::UndefinedBooleanContent)
774     return false;
775 
776   LHS = N.getOperand(0);
777   RHS = N.getOperand(1);
778   CC  = N.getOperand(4);
779   return true;
780 }
781 
782 /// Return true if this is a SetCC-equivalent operation with only one use.
783 /// If this is true, it allows the users to invert the operation for free when
784 /// it is profitable to do so.
785 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
786   SDValue N0, N1, N2;
787   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
788     return true;
789   return false;
790 }
791 
792 // \brief Returns the SDNode if it is a constant float BuildVector
793 // or constant float.
794 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
795   if (isa<ConstantFPSDNode>(N))
796     return N.getNode();
797   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
798     return N.getNode();
799   return nullptr;
800 }
801 
802 // Determines if it is a constant integer or a build vector of constant
803 // integers (and undefs).
804 // Do not permit build vector implicit truncation.
805 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
806   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
807     return !(Const->isOpaque() && NoOpaques);
808   if (N.getOpcode() != ISD::BUILD_VECTOR)
809     return false;
810   unsigned BitWidth = N.getScalarValueSizeInBits();
811   for (const SDValue &Op : N->op_values()) {
812     if (Op.isUndef())
813       continue;
814     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
815     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
816         (Const->isOpaque() && NoOpaques))
817       return false;
818   }
819   return true;
820 }
821 
822 // Determines if it is a constant null integer or a splatted vector of a
823 // constant null integer (with no undefs).
824 // Build vector implicit truncation is not an issue for null values.
825 static bool isNullConstantOrNullSplatConstant(SDValue N) {
826   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
827     return Splat->isNullValue();
828   return false;
829 }
830 
831 // Determines if it is a constant integer of one or a splatted vector of a
832 // constant integer of one (with no undefs).
833 // Do not permit build vector implicit truncation.
834 static bool isOneConstantOrOneSplatConstant(SDValue N) {
835   unsigned BitWidth = N.getScalarValueSizeInBits();
836   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
837     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
838   return false;
839 }
840 
841 // Determines if it is a constant integer of all ones or a splatted vector of a
842 // constant integer of all ones (with no undefs).
843 // Do not permit build vector implicit truncation.
844 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
845   unsigned BitWidth = N.getScalarValueSizeInBits();
846   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
847     return Splat->isAllOnesValue() &&
848            Splat->getAPIntValue().getBitWidth() == BitWidth;
849   return false;
850 }
851 
852 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
853 // undef's.
854 static bool isAnyConstantBuildVector(const SDNode *N) {
855   return ISD::isBuildVectorOfConstantSDNodes(N) ||
856          ISD::isBuildVectorOfConstantFPSDNodes(N);
857 }
858 
859 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
860                                     SDValue N1) {
861   EVT VT = N0.getValueType();
862   if (N0.getOpcode() == Opc) {
863     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
864       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
865         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
866         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
867           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
868         return SDValue();
869       }
870       if (N0.hasOneUse()) {
871         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
872         // use
873         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
874         if (!OpNode.getNode())
875           return SDValue();
876         AddToWorklist(OpNode.getNode());
877         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
878       }
879     }
880   }
881 
882   if (N1.getOpcode() == Opc) {
883     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
884       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
885         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
886         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
887           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
888         return SDValue();
889       }
890       if (N1.hasOneUse()) {
891         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
892         // use
893         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
894         if (!OpNode.getNode())
895           return SDValue();
896         AddToWorklist(OpNode.getNode());
897         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
898       }
899     }
900   }
901 
902   return SDValue();
903 }
904 
905 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
906                                bool AddTo) {
907   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
908   ++NodesCombined;
909   DEBUG(dbgs() << "\nReplacing.1 ";
910         N->dump(&DAG);
911         dbgs() << "\nWith: ";
912         To[0].getNode()->dump(&DAG);
913         dbgs() << " and " << NumTo-1 << " other values\n");
914   for (unsigned i = 0, e = NumTo; i != e; ++i)
915     assert((!To[i].getNode() ||
916             N->getValueType(i) == To[i].getValueType()) &&
917            "Cannot combine value to value of different type!");
918 
919   WorklistRemover DeadNodes(*this);
920   DAG.ReplaceAllUsesWith(N, To);
921   if (AddTo) {
922     // Push the new nodes and any users onto the worklist
923     for (unsigned i = 0, e = NumTo; i != e; ++i) {
924       if (To[i].getNode()) {
925         AddToWorklist(To[i].getNode());
926         AddUsersToWorklist(To[i].getNode());
927       }
928     }
929   }
930 
931   // Finally, if the node is now dead, remove it from the graph.  The node
932   // may not be dead if the replacement process recursively simplified to
933   // something else needing this node.
934   if (N->use_empty())
935     deleteAndRecombine(N);
936   return SDValue(N, 0);
937 }
938 
939 void DAGCombiner::
940 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
941   // Replace all uses.  If any nodes become isomorphic to other nodes and
942   // are deleted, make sure to remove them from our worklist.
943   WorklistRemover DeadNodes(*this);
944   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
945 
946   // Push the new node and any (possibly new) users onto the worklist.
947   AddToWorklist(TLO.New.getNode());
948   AddUsersToWorklist(TLO.New.getNode());
949 
950   // Finally, if the node is now dead, remove it from the graph.  The node
951   // may not be dead if the replacement process recursively simplified to
952   // something else needing this node.
953   if (TLO.Old.getNode()->use_empty())
954     deleteAndRecombine(TLO.Old.getNode());
955 }
956 
957 /// Check the specified integer node value to see if it can be simplified or if
958 /// things it uses can be simplified by bit propagation. If so, return true.
959 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
960   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
961   APInt KnownZero, KnownOne;
962   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
963     return false;
964 
965   // Revisit the node.
966   AddToWorklist(Op.getNode());
967 
968   // Replace the old value with the new one.
969   ++NodesCombined;
970   DEBUG(dbgs() << "\nReplacing.2 ";
971         TLO.Old.getNode()->dump(&DAG);
972         dbgs() << "\nWith: ";
973         TLO.New.getNode()->dump(&DAG);
974         dbgs() << '\n');
975 
976   CommitTargetLoweringOpt(TLO);
977   return true;
978 }
979 
980 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
981   SDLoc DL(Load);
982   EVT VT = Load->getValueType(0);
983   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
984 
985   DEBUG(dbgs() << "\nReplacing.9 ";
986         Load->dump(&DAG);
987         dbgs() << "\nWith: ";
988         Trunc.getNode()->dump(&DAG);
989         dbgs() << '\n');
990   WorklistRemover DeadNodes(*this);
991   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
992   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
993   deleteAndRecombine(Load);
994   AddToWorklist(Trunc.getNode());
995 }
996 
997 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
998   Replace = false;
999   SDLoc DL(Op);
1000   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1001     LoadSDNode *LD = cast<LoadSDNode>(Op);
1002     EVT MemVT = LD->getMemoryVT();
1003     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1004       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1005                                                        : ISD::EXTLOAD)
1006       : LD->getExtensionType();
1007     Replace = true;
1008     return DAG.getExtLoad(ExtType, DL, PVT,
1009                           LD->getChain(), LD->getBasePtr(),
1010                           MemVT, LD->getMemOperand());
1011   }
1012 
1013   unsigned Opc = Op.getOpcode();
1014   switch (Opc) {
1015   default: break;
1016   case ISD::AssertSext:
1017     return DAG.getNode(ISD::AssertSext, DL, PVT,
1018                        SExtPromoteOperand(Op.getOperand(0), PVT),
1019                        Op.getOperand(1));
1020   case ISD::AssertZext:
1021     return DAG.getNode(ISD::AssertZext, DL, PVT,
1022                        ZExtPromoteOperand(Op.getOperand(0), PVT),
1023                        Op.getOperand(1));
1024   case ISD::Constant: {
1025     unsigned ExtOpc =
1026       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1027     return DAG.getNode(ExtOpc, DL, PVT, Op);
1028   }
1029   }
1030 
1031   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1032     return SDValue();
1033   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1034 }
1035 
1036 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1037   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1038     return SDValue();
1039   EVT OldVT = Op.getValueType();
1040   SDLoc DL(Op);
1041   bool Replace = false;
1042   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1043   if (!NewOp.getNode())
1044     return SDValue();
1045   AddToWorklist(NewOp.getNode());
1046 
1047   if (Replace)
1048     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1049   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1050                      DAG.getValueType(OldVT));
1051 }
1052 
1053 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1054   EVT OldVT = Op.getValueType();
1055   SDLoc DL(Op);
1056   bool Replace = false;
1057   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1058   if (!NewOp.getNode())
1059     return SDValue();
1060   AddToWorklist(NewOp.getNode());
1061 
1062   if (Replace)
1063     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1064   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1065 }
1066 
1067 /// Promote the specified integer binary operation if the target indicates it is
1068 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1069 /// i32 since i16 instructions are longer.
1070 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1071   if (!LegalOperations)
1072     return SDValue();
1073 
1074   EVT VT = Op.getValueType();
1075   if (VT.isVector() || !VT.isInteger())
1076     return SDValue();
1077 
1078   // If operation type is 'undesirable', e.g. i16 on x86, consider
1079   // promoting it.
1080   unsigned Opc = Op.getOpcode();
1081   if (TLI.isTypeDesirableForOp(Opc, VT))
1082     return SDValue();
1083 
1084   EVT PVT = VT;
1085   // Consult target whether it is a good idea to promote this operation and
1086   // what's the right type to promote it to.
1087   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1088     assert(PVT != VT && "Don't know what type to promote to!");
1089 
1090     bool Replace0 = false;
1091     SDValue N0 = Op.getOperand(0);
1092     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1093     if (!NN0.getNode())
1094       return SDValue();
1095 
1096     bool Replace1 = false;
1097     SDValue N1 = Op.getOperand(1);
1098     SDValue NN1;
1099     if (N0 == N1)
1100       NN1 = NN0;
1101     else {
1102       NN1 = PromoteOperand(N1, PVT, Replace1);
1103       if (!NN1.getNode())
1104         return SDValue();
1105     }
1106 
1107     AddToWorklist(NN0.getNode());
1108     if (NN1.getNode())
1109       AddToWorklist(NN1.getNode());
1110 
1111     if (Replace0)
1112       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1113     if (Replace1)
1114       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1115 
1116     DEBUG(dbgs() << "\nPromoting ";
1117           Op.getNode()->dump(&DAG));
1118     SDLoc DL(Op);
1119     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1120                        DAG.getNode(Opc, DL, PVT, NN0, NN1));
1121   }
1122   return SDValue();
1123 }
1124 
1125 /// Promote the specified integer shift operation if the target indicates it is
1126 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1127 /// i32 since i16 instructions are longer.
1128 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1129   if (!LegalOperations)
1130     return SDValue();
1131 
1132   EVT VT = Op.getValueType();
1133   if (VT.isVector() || !VT.isInteger())
1134     return SDValue();
1135 
1136   // If operation type is 'undesirable', e.g. i16 on x86, consider
1137   // promoting it.
1138   unsigned Opc = Op.getOpcode();
1139   if (TLI.isTypeDesirableForOp(Opc, VT))
1140     return SDValue();
1141 
1142   EVT PVT = VT;
1143   // Consult target whether it is a good idea to promote this operation and
1144   // what's the right type to promote it to.
1145   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1146     assert(PVT != VT && "Don't know what type to promote to!");
1147 
1148     bool Replace = false;
1149     SDValue N0 = Op.getOperand(0);
1150     if (Opc == ISD::SRA)
1151       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1152     else if (Opc == ISD::SRL)
1153       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1154     else
1155       N0 = PromoteOperand(N0, PVT, Replace);
1156     if (!N0.getNode())
1157       return SDValue();
1158 
1159     AddToWorklist(N0.getNode());
1160     if (Replace)
1161       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1162 
1163     DEBUG(dbgs() << "\nPromoting ";
1164           Op.getNode()->dump(&DAG));
1165     SDLoc DL(Op);
1166     return DAG.getNode(ISD::TRUNCATE, DL, VT,
1167                        DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
1168   }
1169   return SDValue();
1170 }
1171 
1172 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1173   if (!LegalOperations)
1174     return SDValue();
1175 
1176   EVT VT = Op.getValueType();
1177   if (VT.isVector() || !VT.isInteger())
1178     return SDValue();
1179 
1180   // If operation type is 'undesirable', e.g. i16 on x86, consider
1181   // promoting it.
1182   unsigned Opc = Op.getOpcode();
1183   if (TLI.isTypeDesirableForOp(Opc, VT))
1184     return SDValue();
1185 
1186   EVT PVT = VT;
1187   // Consult target whether it is a good idea to promote this operation and
1188   // what's the right type to promote it to.
1189   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1190     assert(PVT != VT && "Don't know what type to promote to!");
1191     // fold (aext (aext x)) -> (aext x)
1192     // fold (aext (zext x)) -> (zext x)
1193     // fold (aext (sext x)) -> (sext x)
1194     DEBUG(dbgs() << "\nPromoting ";
1195           Op.getNode()->dump(&DAG));
1196     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1197   }
1198   return SDValue();
1199 }
1200 
1201 bool DAGCombiner::PromoteLoad(SDValue Op) {
1202   if (!LegalOperations)
1203     return false;
1204 
1205   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1206     return false;
1207 
1208   EVT VT = Op.getValueType();
1209   if (VT.isVector() || !VT.isInteger())
1210     return false;
1211 
1212   // If operation type is 'undesirable', e.g. i16 on x86, consider
1213   // promoting it.
1214   unsigned Opc = Op.getOpcode();
1215   if (TLI.isTypeDesirableForOp(Opc, VT))
1216     return false;
1217 
1218   EVT PVT = VT;
1219   // Consult target whether it is a good idea to promote this operation and
1220   // what's the right type to promote it to.
1221   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1222     assert(PVT != VT && "Don't know what type to promote to!");
1223 
1224     SDLoc DL(Op);
1225     SDNode *N = Op.getNode();
1226     LoadSDNode *LD = cast<LoadSDNode>(N);
1227     EVT MemVT = LD->getMemoryVT();
1228     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1229       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1230                                                        : ISD::EXTLOAD)
1231       : LD->getExtensionType();
1232     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1233                                    LD->getChain(), LD->getBasePtr(),
1234                                    MemVT, LD->getMemOperand());
1235     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1236 
1237     DEBUG(dbgs() << "\nPromoting ";
1238           N->dump(&DAG);
1239           dbgs() << "\nTo: ";
1240           Result.getNode()->dump(&DAG);
1241           dbgs() << '\n');
1242     WorklistRemover DeadNodes(*this);
1243     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1244     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1245     deleteAndRecombine(N);
1246     AddToWorklist(Result.getNode());
1247     return true;
1248   }
1249   return false;
1250 }
1251 
1252 /// \brief Recursively delete a node which has no uses and any operands for
1253 /// which it is the only use.
1254 ///
1255 /// Note that this both deletes the nodes and removes them from the worklist.
1256 /// It also adds any nodes who have had a user deleted to the worklist as they
1257 /// may now have only one use and subject to other combines.
1258 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1259   if (!N->use_empty())
1260     return false;
1261 
1262   SmallSetVector<SDNode *, 16> Nodes;
1263   Nodes.insert(N);
1264   do {
1265     N = Nodes.pop_back_val();
1266     if (!N)
1267       continue;
1268 
1269     if (N->use_empty()) {
1270       for (const SDValue &ChildN : N->op_values())
1271         Nodes.insert(ChildN.getNode());
1272 
1273       removeFromWorklist(N);
1274       DAG.DeleteNode(N);
1275     } else {
1276       AddToWorklist(N);
1277     }
1278   } while (!Nodes.empty());
1279   return true;
1280 }
1281 
1282 //===----------------------------------------------------------------------===//
1283 //  Main DAG Combiner implementation
1284 //===----------------------------------------------------------------------===//
1285 
1286 void DAGCombiner::Run(CombineLevel AtLevel) {
1287   // set the instance variables, so that the various visit routines may use it.
1288   Level = AtLevel;
1289   LegalOperations = Level >= AfterLegalizeVectorOps;
1290   LegalTypes = Level >= AfterLegalizeTypes;
1291 
1292   // Add all the dag nodes to the worklist.
1293   for (SDNode &Node : DAG.allnodes())
1294     AddToWorklist(&Node);
1295 
1296   // Create a dummy node (which is not added to allnodes), that adds a reference
1297   // to the root node, preventing it from being deleted, and tracking any
1298   // changes of the root.
1299   HandleSDNode Dummy(DAG.getRoot());
1300 
1301   // While the worklist isn't empty, find a node and try to combine it.
1302   while (!WorklistMap.empty()) {
1303     SDNode *N;
1304     // The Worklist holds the SDNodes in order, but it may contain null entries.
1305     do {
1306       N = Worklist.pop_back_val();
1307     } while (!N);
1308 
1309     bool GoodWorklistEntry = WorklistMap.erase(N);
1310     (void)GoodWorklistEntry;
1311     assert(GoodWorklistEntry &&
1312            "Found a worklist entry without a corresponding map entry!");
1313 
1314     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1315     // N is deleted from the DAG, since they too may now be dead or may have a
1316     // reduced number of uses, allowing other xforms.
1317     if (recursivelyDeleteUnusedNodes(N))
1318       continue;
1319 
1320     WorklistRemover DeadNodes(*this);
1321 
1322     // If this combine is running after legalizing the DAG, re-legalize any
1323     // nodes pulled off the worklist.
1324     if (Level == AfterLegalizeDAG) {
1325       SmallSetVector<SDNode *, 16> UpdatedNodes;
1326       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1327 
1328       for (SDNode *LN : UpdatedNodes) {
1329         AddToWorklist(LN);
1330         AddUsersToWorklist(LN);
1331       }
1332       if (!NIsValid)
1333         continue;
1334     }
1335 
1336     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1337 
1338     // Add any operands of the new node which have not yet been combined to the
1339     // worklist as well. Because the worklist uniques things already, this
1340     // won't repeatedly process the same operand.
1341     CombinedNodes.insert(N);
1342     for (const SDValue &ChildN : N->op_values())
1343       if (!CombinedNodes.count(ChildN.getNode()))
1344         AddToWorklist(ChildN.getNode());
1345 
1346     SDValue RV = combine(N);
1347 
1348     if (!RV.getNode())
1349       continue;
1350 
1351     ++NodesCombined;
1352 
1353     // If we get back the same node we passed in, rather than a new node or
1354     // zero, we know that the node must have defined multiple values and
1355     // CombineTo was used.  Since CombineTo takes care of the worklist
1356     // mechanics for us, we have no work to do in this case.
1357     if (RV.getNode() == N)
1358       continue;
1359 
1360     assert(N->getOpcode() != ISD::DELETED_NODE &&
1361            RV.getOpcode() != ISD::DELETED_NODE &&
1362            "Node was deleted but visit returned new node!");
1363 
1364     DEBUG(dbgs() << " ... into: ";
1365           RV.getNode()->dump(&DAG));
1366 
1367     if (N->getNumValues() == RV.getNode()->getNumValues())
1368       DAG.ReplaceAllUsesWith(N, RV.getNode());
1369     else {
1370       assert(N->getValueType(0) == RV.getValueType() &&
1371              N->getNumValues() == 1 && "Type mismatch");
1372       DAG.ReplaceAllUsesWith(N, &RV);
1373     }
1374 
1375     // Push the new node and any users onto the worklist
1376     AddToWorklist(RV.getNode());
1377     AddUsersToWorklist(RV.getNode());
1378 
1379     // Finally, if the node is now dead, remove it from the graph.  The node
1380     // may not be dead if the replacement process recursively simplified to
1381     // something else needing this node. This will also take care of adding any
1382     // operands which have lost a user to the worklist.
1383     recursivelyDeleteUnusedNodes(N);
1384   }
1385 
1386   // If the root changed (e.g. it was a dead load, update the root).
1387   DAG.setRoot(Dummy.getValue());
1388   DAG.RemoveDeadNodes();
1389 }
1390 
1391 SDValue DAGCombiner::visit(SDNode *N) {
1392   switch (N->getOpcode()) {
1393   default: break;
1394   case ISD::TokenFactor:        return visitTokenFactor(N);
1395   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1396   case ISD::ADD:                return visitADD(N);
1397   case ISD::SUB:                return visitSUB(N);
1398   case ISD::ADDC:               return visitADDC(N);
1399   case ISD::UADDO:              return visitUADDO(N);
1400   case ISD::SUBC:               return visitSUBC(N);
1401   case ISD::USUBO:              return visitUSUBO(N);
1402   case ISD::ADDE:               return visitADDE(N);
1403   case ISD::SUBE:               return visitSUBE(N);
1404   case ISD::MUL:                return visitMUL(N);
1405   case ISD::SDIV:               return visitSDIV(N);
1406   case ISD::UDIV:               return visitUDIV(N);
1407   case ISD::SREM:
1408   case ISD::UREM:               return visitREM(N);
1409   case ISD::MULHU:              return visitMULHU(N);
1410   case ISD::MULHS:              return visitMULHS(N);
1411   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1412   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1413   case ISD::SMULO:              return visitSMULO(N);
1414   case ISD::UMULO:              return visitUMULO(N);
1415   case ISD::SMIN:
1416   case ISD::SMAX:
1417   case ISD::UMIN:
1418   case ISD::UMAX:               return visitIMINMAX(N);
1419   case ISD::AND:                return visitAND(N);
1420   case ISD::OR:                 return visitOR(N);
1421   case ISD::XOR:                return visitXOR(N);
1422   case ISD::SHL:                return visitSHL(N);
1423   case ISD::SRA:                return visitSRA(N);
1424   case ISD::SRL:                return visitSRL(N);
1425   case ISD::ROTR:
1426   case ISD::ROTL:               return visitRotate(N);
1427   case ISD::ABS:                return visitABS(N);
1428   case ISD::BSWAP:              return visitBSWAP(N);
1429   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1430   case ISD::CTLZ:               return visitCTLZ(N);
1431   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1432   case ISD::CTTZ:               return visitCTTZ(N);
1433   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1434   case ISD::CTPOP:              return visitCTPOP(N);
1435   case ISD::SELECT:             return visitSELECT(N);
1436   case ISD::VSELECT:            return visitVSELECT(N);
1437   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1438   case ISD::SETCC:              return visitSETCC(N);
1439   case ISD::SETCCE:             return visitSETCCE(N);
1440   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1441   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1442   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1443   case ISD::AssertZext:         return visitAssertZext(N);
1444   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1445   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1446   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1447   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1448   case ISD::BITCAST:            return visitBITCAST(N);
1449   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1450   case ISD::FADD:               return visitFADD(N);
1451   case ISD::FSUB:               return visitFSUB(N);
1452   case ISD::FMUL:               return visitFMUL(N);
1453   case ISD::FMA:                return visitFMA(N);
1454   case ISD::FDIV:               return visitFDIV(N);
1455   case ISD::FREM:               return visitFREM(N);
1456   case ISD::FSQRT:              return visitFSQRT(N);
1457   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1458   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1459   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1460   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1461   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1462   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1463   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1464   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1465   case ISD::FNEG:               return visitFNEG(N);
1466   case ISD::FABS:               return visitFABS(N);
1467   case ISD::FFLOOR:             return visitFFLOOR(N);
1468   case ISD::FMINNUM:            return visitFMINNUM(N);
1469   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1470   case ISD::FCEIL:              return visitFCEIL(N);
1471   case ISD::FTRUNC:             return visitFTRUNC(N);
1472   case ISD::BRCOND:             return visitBRCOND(N);
1473   case ISD::BR_CC:              return visitBR_CC(N);
1474   case ISD::LOAD:               return visitLOAD(N);
1475   case ISD::STORE:              return visitSTORE(N);
1476   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1477   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1478   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1479   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1480   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1481   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1482   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1483   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1484   case ISD::MGATHER:            return visitMGATHER(N);
1485   case ISD::MLOAD:              return visitMLOAD(N);
1486   case ISD::MSCATTER:           return visitMSCATTER(N);
1487   case ISD::MSTORE:             return visitMSTORE(N);
1488   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1489   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1490   }
1491   return SDValue();
1492 }
1493 
1494 SDValue DAGCombiner::combine(SDNode *N) {
1495   SDValue RV = visit(N);
1496 
1497   // If nothing happened, try a target-specific DAG combine.
1498   if (!RV.getNode()) {
1499     assert(N->getOpcode() != ISD::DELETED_NODE &&
1500            "Node was deleted but visit returned NULL!");
1501 
1502     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1503         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1504 
1505       // Expose the DAG combiner to the target combiner impls.
1506       TargetLowering::DAGCombinerInfo
1507         DagCombineInfo(DAG, Level, false, this);
1508 
1509       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1510     }
1511   }
1512 
1513   // If nothing happened still, try promoting the operation.
1514   if (!RV.getNode()) {
1515     switch (N->getOpcode()) {
1516     default: break;
1517     case ISD::ADD:
1518     case ISD::SUB:
1519     case ISD::MUL:
1520     case ISD::AND:
1521     case ISD::OR:
1522     case ISD::XOR:
1523       RV = PromoteIntBinOp(SDValue(N, 0));
1524       break;
1525     case ISD::SHL:
1526     case ISD::SRA:
1527     case ISD::SRL:
1528       RV = PromoteIntShiftOp(SDValue(N, 0));
1529       break;
1530     case ISD::SIGN_EXTEND:
1531     case ISD::ZERO_EXTEND:
1532     case ISD::ANY_EXTEND:
1533       RV = PromoteExtend(SDValue(N, 0));
1534       break;
1535     case ISD::LOAD:
1536       if (PromoteLoad(SDValue(N, 0)))
1537         RV = SDValue(N, 0);
1538       break;
1539     }
1540   }
1541 
1542   // If N is a commutative binary node, try commuting it to enable more
1543   // sdisel CSE.
1544   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1545       N->getNumValues() == 1) {
1546     SDValue N0 = N->getOperand(0);
1547     SDValue N1 = N->getOperand(1);
1548 
1549     // Constant operands are canonicalized to RHS.
1550     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1551       SDValue Ops[] = {N1, N0};
1552       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1553                                             N->getFlags());
1554       if (CSENode)
1555         return SDValue(CSENode, 0);
1556     }
1557   }
1558 
1559   return RV;
1560 }
1561 
1562 /// Given a node, return its input chain if it has one, otherwise return a null
1563 /// sd operand.
1564 static SDValue getInputChainForNode(SDNode *N) {
1565   if (unsigned NumOps = N->getNumOperands()) {
1566     if (N->getOperand(0).getValueType() == MVT::Other)
1567       return N->getOperand(0);
1568     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1569       return N->getOperand(NumOps-1);
1570     for (unsigned i = 1; i < NumOps-1; ++i)
1571       if (N->getOperand(i).getValueType() == MVT::Other)
1572         return N->getOperand(i);
1573   }
1574   return SDValue();
1575 }
1576 
1577 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1578   // If N has two operands, where one has an input chain equal to the other,
1579   // the 'other' chain is redundant.
1580   if (N->getNumOperands() == 2) {
1581     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1582       return N->getOperand(0);
1583     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1584       return N->getOperand(1);
1585   }
1586 
1587   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1588   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1589   SmallPtrSet<SDNode*, 16> SeenOps;
1590   bool Changed = false;             // If we should replace this token factor.
1591 
1592   // Start out with this token factor.
1593   TFs.push_back(N);
1594 
1595   // Iterate through token factors.  The TFs grows when new token factors are
1596   // encountered.
1597   for (unsigned i = 0; i < TFs.size(); ++i) {
1598     SDNode *TF = TFs[i];
1599 
1600     // Check each of the operands.
1601     for (const SDValue &Op : TF->op_values()) {
1602 
1603       switch (Op.getOpcode()) {
1604       case ISD::EntryToken:
1605         // Entry tokens don't need to be added to the list. They are
1606         // redundant.
1607         Changed = true;
1608         break;
1609 
1610       case ISD::TokenFactor:
1611         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1612           // Queue up for processing.
1613           TFs.push_back(Op.getNode());
1614           // Clean up in case the token factor is removed.
1615           AddToWorklist(Op.getNode());
1616           Changed = true;
1617           break;
1618         }
1619         LLVM_FALLTHROUGH;
1620 
1621       default:
1622         // Only add if it isn't already in the list.
1623         if (SeenOps.insert(Op.getNode()).second)
1624           Ops.push_back(Op);
1625         else
1626           Changed = true;
1627         break;
1628       }
1629     }
1630   }
1631 
1632   // Remove Nodes that are chained to another node in the list. Do so
1633   // by walking up chains breath-first stopping when we've seen
1634   // another operand. In general we must climb to the EntryNode, but we can exit
1635   // early if we find all remaining work is associated with just one operand as
1636   // no further pruning is possible.
1637 
1638   // List of nodes to search through and original Ops from which they originate.
1639   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1640   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1641   SmallPtrSet<SDNode *, 16> SeenChains;
1642   bool DidPruneOps = false;
1643 
1644   unsigned NumLeftToConsider = 0;
1645   for (const SDValue &Op : Ops) {
1646     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1647     OpWorkCount.push_back(1);
1648   }
1649 
1650   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1651     // If this is an Op, we can remove the op from the list. Remark any
1652     // search associated with it as from the current OpNumber.
1653     if (SeenOps.count(Op) != 0) {
1654       Changed = true;
1655       DidPruneOps = true;
1656       unsigned OrigOpNumber = 0;
1657       while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber < Ops.size())
1658         OrigOpNumber++;
1659       assert((OrigOpNumber != Ops.size()) &&
1660              "expected to find TokenFactor Operand");
1661       // Re-mark worklist from OrigOpNumber to OpNumber
1662       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1663         if (Worklist[i].second == OrigOpNumber) {
1664           Worklist[i].second = OpNumber;
1665         }
1666       }
1667       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1668       OpWorkCount[OrigOpNumber] = 0;
1669       NumLeftToConsider--;
1670     }
1671     // Add if it's a new chain
1672     if (SeenChains.insert(Op).second) {
1673       OpWorkCount[OpNumber]++;
1674       Worklist.push_back(std::make_pair(Op, OpNumber));
1675     }
1676   };
1677 
1678   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1679     // We need at least be consider at least 2 Ops to prune.
1680     if (NumLeftToConsider <= 1)
1681       break;
1682     auto CurNode = Worklist[i].first;
1683     auto CurOpNumber = Worklist[i].second;
1684     assert((OpWorkCount[CurOpNumber] > 0) &&
1685            "Node should not appear in worklist");
1686     switch (CurNode->getOpcode()) {
1687     case ISD::EntryToken:
1688       // Hitting EntryToken is the only way for the search to terminate without
1689       // hitting
1690       // another operand's search. Prevent us from marking this operand
1691       // considered.
1692       NumLeftToConsider++;
1693       break;
1694     case ISD::TokenFactor:
1695       for (const SDValue &Op : CurNode->op_values())
1696         AddToWorklist(i, Op.getNode(), CurOpNumber);
1697       break;
1698     case ISD::CopyFromReg:
1699     case ISD::CopyToReg:
1700       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1701       break;
1702     default:
1703       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1704         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1705       break;
1706     }
1707     OpWorkCount[CurOpNumber]--;
1708     if (OpWorkCount[CurOpNumber] == 0)
1709       NumLeftToConsider--;
1710   }
1711 
1712   SDValue Result;
1713 
1714   // If we've changed things around then replace token factor.
1715   if (Changed) {
1716     if (Ops.empty()) {
1717       // The entry token is the only possible outcome.
1718       Result = DAG.getEntryNode();
1719     } else {
1720       if (DidPruneOps) {
1721         SmallVector<SDValue, 8> PrunedOps;
1722         //
1723         for (const SDValue &Op : Ops) {
1724           if (SeenChains.count(Op.getNode()) == 0)
1725             PrunedOps.push_back(Op);
1726         }
1727         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1728       } else {
1729         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1730       }
1731     }
1732 
1733     // Add users to worklist, since we may introduce a lot of new
1734     // chained token factors while removing memory deps.
1735     return CombineTo(N, Result, true /*add to worklist*/);
1736   }
1737 
1738   return Result;
1739 }
1740 
1741 /// MERGE_VALUES can always be eliminated.
1742 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1743   WorklistRemover DeadNodes(*this);
1744   // Replacing results may cause a different MERGE_VALUES to suddenly
1745   // be CSE'd with N, and carry its uses with it. Iterate until no
1746   // uses remain, to ensure that the node can be safely deleted.
1747   // First add the users of this node to the work list so that they
1748   // can be tried again once they have new operands.
1749   AddUsersToWorklist(N);
1750   do {
1751     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1752       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1753   } while (!N->use_empty());
1754   deleteAndRecombine(N);
1755   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1756 }
1757 
1758 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1759 /// ConstantSDNode pointer else nullptr.
1760 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1761   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1762   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1763 }
1764 
1765 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1766   auto BinOpcode = BO->getOpcode();
1767   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1768           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1769           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1770           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1771           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1772           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1773           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1774           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1775           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1776          "Unexpected binary operator");
1777 
1778   // Bail out if any constants are opaque because we can't constant fold those.
1779   SDValue C1 = BO->getOperand(1);
1780   if (!isConstantOrConstantVector(C1, true) &&
1781       !isConstantFPBuildVectorOrConstantFP(C1))
1782     return SDValue();
1783 
1784   // Don't do this unless the old select is going away. We want to eliminate the
1785   // binary operator, not replace a binop with a select.
1786   // TODO: Handle ISD::SELECT_CC.
1787   SDValue Sel = BO->getOperand(0);
1788   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1789     return SDValue();
1790 
1791   SDValue CT = Sel.getOperand(1);
1792   if (!isConstantOrConstantVector(CT, true) &&
1793       !isConstantFPBuildVectorOrConstantFP(CT))
1794     return SDValue();
1795 
1796   SDValue CF = Sel.getOperand(2);
1797   if (!isConstantOrConstantVector(CF, true) &&
1798       !isConstantFPBuildVectorOrConstantFP(CF))
1799     return SDValue();
1800 
1801   // We have a select-of-constants followed by a binary operator with a
1802   // constant. Eliminate the binop by pulling the constant math into the select.
1803   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1804   EVT VT = Sel.getValueType();
1805   SDLoc DL(Sel);
1806   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1807   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1808           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1809          "Failed to constant fold a binop with constant operands");
1810 
1811   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1812   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1813           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1814          "Failed to constant fold a binop with constant operands");
1815 
1816   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1817 }
1818 
1819 SDValue DAGCombiner::visitADD(SDNode *N) {
1820   SDValue N0 = N->getOperand(0);
1821   SDValue N1 = N->getOperand(1);
1822   EVT VT = N0.getValueType();
1823   SDLoc DL(N);
1824 
1825   // fold vector ops
1826   if (VT.isVector()) {
1827     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1828       return FoldedVOp;
1829 
1830     // fold (add x, 0) -> x, vector edition
1831     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1832       return N0;
1833     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1834       return N1;
1835   }
1836 
1837   // fold (add x, undef) -> undef
1838   if (N0.isUndef())
1839     return N0;
1840 
1841   if (N1.isUndef())
1842     return N1;
1843 
1844   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1845     // canonicalize constant to RHS
1846     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1847       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1848     // fold (add c1, c2) -> c1+c2
1849     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1850                                       N1.getNode());
1851   }
1852 
1853   // fold (add x, 0) -> x
1854   if (isNullConstant(N1))
1855     return N0;
1856 
1857   // fold ((c1-A)+c2) -> (c1+c2)-A
1858   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1859     if (N0.getOpcode() == ISD::SUB)
1860       if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1861         return DAG.getNode(ISD::SUB, DL, VT,
1862                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1863                            N0.getOperand(1));
1864       }
1865   }
1866 
1867   if (SDValue NewSel = foldBinOpIntoSelect(N))
1868     return NewSel;
1869 
1870   // reassociate add
1871   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1872     return RADD;
1873 
1874   // fold ((0-A) + B) -> B-A
1875   if (N0.getOpcode() == ISD::SUB &&
1876       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1877     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1878 
1879   // fold (A + (0-B)) -> A-B
1880   if (N1.getOpcode() == ISD::SUB &&
1881       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1882     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1883 
1884   // fold (A+(B-A)) -> B
1885   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1886     return N1.getOperand(0);
1887 
1888   // fold ((B-A)+A) -> B
1889   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1890     return N0.getOperand(0);
1891 
1892   // fold (A+(B-(A+C))) to (B-C)
1893   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1894       N0 == N1.getOperand(1).getOperand(0))
1895     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1896                        N1.getOperand(1).getOperand(1));
1897 
1898   // fold (A+(B-(C+A))) to (B-C)
1899   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1900       N0 == N1.getOperand(1).getOperand(1))
1901     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1902                        N1.getOperand(1).getOperand(0));
1903 
1904   // fold (A+((B-A)+or-C)) to (B+or-C)
1905   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1906       N1.getOperand(0).getOpcode() == ISD::SUB &&
1907       N0 == N1.getOperand(0).getOperand(1))
1908     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1909                        N1.getOperand(1));
1910 
1911   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1912   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1913     SDValue N00 = N0.getOperand(0);
1914     SDValue N01 = N0.getOperand(1);
1915     SDValue N10 = N1.getOperand(0);
1916     SDValue N11 = N1.getOperand(1);
1917 
1918     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1919       return DAG.getNode(ISD::SUB, DL, VT,
1920                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1921                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1922   }
1923 
1924   if (SimplifyDemandedBits(SDValue(N, 0)))
1925     return SDValue(N, 0);
1926 
1927   // fold (a+b) -> (a|b) iff a and b share no bits.
1928   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1929       VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
1930     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1931 
1932   if (SDValue Combined = visitADDLike(N0, N1, N))
1933     return Combined;
1934 
1935   if (SDValue Combined = visitADDLike(N1, N0, N))
1936     return Combined;
1937 
1938   return SDValue();
1939 }
1940 
1941 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
1942   EVT VT = N0.getValueType();
1943   SDLoc DL(LocReference);
1944 
1945   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1946   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1947       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
1948     return DAG.getNode(ISD::SUB, DL, VT, N0,
1949                        DAG.getNode(ISD::SHL, DL, VT,
1950                                    N1.getOperand(0).getOperand(1),
1951                                    N1.getOperand(1)));
1952 
1953   if (N1.getOpcode() == ISD::AND) {
1954     SDValue AndOp0 = N1.getOperand(0);
1955     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1956     unsigned DestBits = VT.getScalarSizeInBits();
1957 
1958     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1959     // and similar xforms where the inner op is either ~0 or 0.
1960     if (NumSignBits == DestBits &&
1961         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
1962       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
1963   }
1964 
1965   // add (sext i1), X -> sub X, (zext i1)
1966   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1967       N0.getOperand(0).getValueType() == MVT::i1 &&
1968       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1969     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1970     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1971   }
1972 
1973   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1974   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1975     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1976     if (TN->getVT() == MVT::i1) {
1977       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1978                                  DAG.getConstant(1, DL, VT));
1979       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1980     }
1981   }
1982 
1983   return SDValue();
1984 }
1985 
1986 SDValue DAGCombiner::visitADDC(SDNode *N) {
1987   SDValue N0 = N->getOperand(0);
1988   SDValue N1 = N->getOperand(1);
1989   EVT VT = N0.getValueType();
1990   SDLoc DL(N);
1991 
1992   // If the flag result is dead, turn this into an ADD.
1993   if (!N->hasAnyUseOfValue(1))
1994     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
1995                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
1996 
1997   // canonicalize constant to RHS.
1998   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1999   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2000   if (N0C && !N1C)
2001     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2002 
2003   // fold (addc x, 0) -> x + no carry out
2004   if (isNullConstant(N1))
2005     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2006                                         DL, MVT::Glue));
2007 
2008   // If it cannot overflow, transform into an add.
2009   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2010     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2011                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2012 
2013   return SDValue();
2014 }
2015 
2016 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2017   SDValue N0 = N->getOperand(0);
2018   SDValue N1 = N->getOperand(1);
2019   EVT VT = N0.getValueType();
2020   if (VT.isVector())
2021     return SDValue();
2022 
2023   EVT CarryVT = N->getValueType(1);
2024   SDLoc DL(N);
2025 
2026   // If the flag result is dead, turn this into an ADD.
2027   if (!N->hasAnyUseOfValue(1))
2028     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2029                      DAG.getUNDEF(CarryVT));
2030 
2031   // canonicalize constant to RHS.
2032   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2033   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2034   if (N0C && !N1C)
2035     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2036 
2037   // fold (uaddo x, 0) -> x + no carry out
2038   if (isNullConstant(N1))
2039     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2040 
2041   // If it cannot overflow, transform into an add.
2042   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2043     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2044                      DAG.getConstant(0, DL, CarryVT));
2045 
2046   return SDValue();
2047 }
2048 
2049 SDValue DAGCombiner::visitADDE(SDNode *N) {
2050   SDValue N0 = N->getOperand(0);
2051   SDValue N1 = N->getOperand(1);
2052   SDValue CarryIn = N->getOperand(2);
2053 
2054   // canonicalize constant to RHS
2055   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2056   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2057   if (N0C && !N1C)
2058     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2059                        N1, N0, CarryIn);
2060 
2061   // fold (adde x, y, false) -> (addc x, y)
2062   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2063     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2064 
2065   return SDValue();
2066 }
2067 
2068 // Since it may not be valid to emit a fold to zero for vector initializers
2069 // check if we can before folding.
2070 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2071                              SelectionDAG &DAG, bool LegalOperations,
2072                              bool LegalTypes) {
2073   if (!VT.isVector())
2074     return DAG.getConstant(0, DL, VT);
2075   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2076     return DAG.getConstant(0, DL, VT);
2077   return SDValue();
2078 }
2079 
2080 SDValue DAGCombiner::visitSUB(SDNode *N) {
2081   SDValue N0 = N->getOperand(0);
2082   SDValue N1 = N->getOperand(1);
2083   EVT VT = N0.getValueType();
2084   SDLoc DL(N);
2085 
2086   // fold vector ops
2087   if (VT.isVector()) {
2088     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2089       return FoldedVOp;
2090 
2091     // fold (sub x, 0) -> x, vector edition
2092     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2093       return N0;
2094   }
2095 
2096   // fold (sub x, x) -> 0
2097   // FIXME: Refactor this and xor and other similar operations together.
2098   if (N0 == N1)
2099     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2100   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2101       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2102     // fold (sub c1, c2) -> c1-c2
2103     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2104                                       N1.getNode());
2105   }
2106 
2107   if (SDValue NewSel = foldBinOpIntoSelect(N))
2108     return NewSel;
2109 
2110   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2111 
2112   // fold (sub x, c) -> (add x, -c)
2113   if (N1C) {
2114     return DAG.getNode(ISD::ADD, DL, VT, N0,
2115                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2116   }
2117 
2118   if (isNullConstantOrNullSplatConstant(N0)) {
2119     unsigned BitWidth = VT.getScalarSizeInBits();
2120     // Right-shifting everything out but the sign bit followed by negation is
2121     // the same as flipping arithmetic/logical shift type without the negation:
2122     // -(X >>u 31) -> (X >>s 31)
2123     // -(X >>s 31) -> (X >>u 31)
2124     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2125       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2126       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2127         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2128         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2129           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2130       }
2131     }
2132 
2133     // 0 - X --> 0 if the sub is NUW.
2134     if (N->getFlags()->hasNoUnsignedWrap())
2135       return N0;
2136 
2137     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
2138       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2139       // N1 must be 0 because negating the minimum signed value is undefined.
2140       if (N->getFlags()->hasNoSignedWrap())
2141         return N0;
2142 
2143       // 0 - X --> X if X is 0 or the minimum signed value.
2144       return N1;
2145     }
2146   }
2147 
2148   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2149   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2150     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2151 
2152   // fold A-(A-B) -> B
2153   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2154     return N1.getOperand(1);
2155 
2156   // fold (A+B)-A -> B
2157   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2158     return N0.getOperand(1);
2159 
2160   // fold (A+B)-B -> A
2161   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2162     return N0.getOperand(0);
2163 
2164   // fold C2-(A+C1) -> (C2-C1)-A
2165   if (N1.getOpcode() == ISD::ADD) {
2166     SDValue N11 = N1.getOperand(1);
2167     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2168         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2169       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2170       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2171     }
2172   }
2173 
2174   // fold ((A+(B+or-C))-B) -> A+or-C
2175   if (N0.getOpcode() == ISD::ADD &&
2176       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2177        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2178       N0.getOperand(1).getOperand(0) == N1)
2179     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2180                        N0.getOperand(1).getOperand(1));
2181 
2182   // fold ((A+(C+B))-B) -> A+C
2183   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2184       N0.getOperand(1).getOperand(1) == N1)
2185     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2186                        N0.getOperand(1).getOperand(0));
2187 
2188   // fold ((A-(B-C))-C) -> A-B
2189   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2190       N0.getOperand(1).getOperand(1) == N1)
2191     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2192                        N0.getOperand(1).getOperand(0));
2193 
2194   // If either operand of a sub is undef, the result is undef
2195   if (N0.isUndef())
2196     return N0;
2197   if (N1.isUndef())
2198     return N1;
2199 
2200   // If the relocation model supports it, consider symbol offsets.
2201   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2202     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2203       // fold (sub Sym, c) -> Sym-c
2204       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2205         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2206                                     GA->getOffset() -
2207                                         (uint64_t)N1C->getSExtValue());
2208       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2209       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2210         if (GA->getGlobal() == GB->getGlobal())
2211           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2212                                  DL, VT);
2213     }
2214 
2215   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2216   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2217     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2218     if (TN->getVT() == MVT::i1) {
2219       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2220                                  DAG.getConstant(1, DL, VT));
2221       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2222     }
2223   }
2224 
2225   return SDValue();
2226 }
2227 
2228 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2229   SDValue N0 = N->getOperand(0);
2230   SDValue N1 = N->getOperand(1);
2231   EVT VT = N0.getValueType();
2232   SDLoc DL(N);
2233 
2234   // If the flag result is dead, turn this into an SUB.
2235   if (!N->hasAnyUseOfValue(1))
2236     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2237                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2238 
2239   // fold (subc x, x) -> 0 + no borrow
2240   if (N0 == N1)
2241     return CombineTo(N, DAG.getConstant(0, DL, VT),
2242                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2243 
2244   // fold (subc x, 0) -> x + no borrow
2245   if (isNullConstant(N1))
2246     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2247 
2248   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2249   if (isAllOnesConstant(N0))
2250     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2251                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2252 
2253   return SDValue();
2254 }
2255 
2256 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2257   SDValue N0 = N->getOperand(0);
2258   SDValue N1 = N->getOperand(1);
2259   EVT VT = N0.getValueType();
2260   if (VT.isVector())
2261     return SDValue();
2262 
2263   EVT CarryVT = N->getValueType(1);
2264   SDLoc DL(N);
2265 
2266   // If the flag result is dead, turn this into an SUB.
2267   if (!N->hasAnyUseOfValue(1))
2268     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2269                      DAG.getUNDEF(CarryVT));
2270 
2271   // fold (usubo x, x) -> 0 + no borrow
2272   if (N0 == N1)
2273     return CombineTo(N, DAG.getConstant(0, DL, VT),
2274                      DAG.getConstant(0, DL, CarryVT));
2275 
2276   // fold (usubo x, 0) -> x + no borrow
2277   if (isNullConstant(N1))
2278     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2279 
2280   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2281   if (isAllOnesConstant(N0))
2282     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2283                      DAG.getConstant(0, DL, CarryVT));
2284 
2285   return SDValue();
2286 }
2287 
2288 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2289   SDValue N0 = N->getOperand(0);
2290   SDValue N1 = N->getOperand(1);
2291   SDValue CarryIn = N->getOperand(2);
2292 
2293   // fold (sube x, y, false) -> (subc x, y)
2294   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2295     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2296 
2297   return SDValue();
2298 }
2299 
2300 SDValue DAGCombiner::visitMUL(SDNode *N) {
2301   SDValue N0 = N->getOperand(0);
2302   SDValue N1 = N->getOperand(1);
2303   EVT VT = N0.getValueType();
2304 
2305   // fold (mul x, undef) -> 0
2306   if (N0.isUndef() || N1.isUndef())
2307     return DAG.getConstant(0, SDLoc(N), VT);
2308 
2309   bool N0IsConst = false;
2310   bool N1IsConst = false;
2311   bool N1IsOpaqueConst = false;
2312   bool N0IsOpaqueConst = false;
2313   APInt ConstValue0, ConstValue1;
2314   // fold vector ops
2315   if (VT.isVector()) {
2316     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2317       return FoldedVOp;
2318 
2319     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2320     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2321   } else {
2322     N0IsConst = isa<ConstantSDNode>(N0);
2323     if (N0IsConst) {
2324       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2325       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2326     }
2327     N1IsConst = isa<ConstantSDNode>(N1);
2328     if (N1IsConst) {
2329       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2330       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2331     }
2332   }
2333 
2334   // fold (mul c1, c2) -> c1*c2
2335   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2336     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2337                                       N0.getNode(), N1.getNode());
2338 
2339   // canonicalize constant to RHS (vector doesn't have to splat)
2340   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2341      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2342     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2343   // fold (mul x, 0) -> 0
2344   if (N1IsConst && ConstValue1 == 0)
2345     return N1;
2346   // We require a splat of the entire scalar bit width for non-contiguous
2347   // bit patterns.
2348   bool IsFullSplat =
2349     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2350   // fold (mul x, 1) -> x
2351   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2352     return N0;
2353 
2354   if (SDValue NewSel = foldBinOpIntoSelect(N))
2355     return NewSel;
2356 
2357   // fold (mul x, -1) -> 0-x
2358   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2359     SDLoc DL(N);
2360     return DAG.getNode(ISD::SUB, DL, VT,
2361                        DAG.getConstant(0, DL, VT), N0);
2362   }
2363   // fold (mul x, (1 << c)) -> x << c
2364   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2365       IsFullSplat) {
2366     SDLoc DL(N);
2367     return DAG.getNode(ISD::SHL, DL, VT, N0,
2368                        DAG.getConstant(ConstValue1.logBase2(), DL,
2369                                        getShiftAmountTy(N0.getValueType())));
2370   }
2371   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2372   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2373       IsFullSplat) {
2374     unsigned Log2Val = (-ConstValue1).logBase2();
2375     SDLoc DL(N);
2376     // FIXME: If the input is something that is easily negated (e.g. a
2377     // single-use add), we should put the negate there.
2378     return DAG.getNode(ISD::SUB, DL, VT,
2379                        DAG.getConstant(0, DL, VT),
2380                        DAG.getNode(ISD::SHL, DL, VT, N0,
2381                             DAG.getConstant(Log2Val, DL,
2382                                       getShiftAmountTy(N0.getValueType()))));
2383   }
2384 
2385   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2386   if (N0.getOpcode() == ISD::SHL &&
2387       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2388       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2389     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2390     if (isConstantOrConstantVector(C3))
2391       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2392   }
2393 
2394   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2395   // use.
2396   {
2397     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2398 
2399     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2400     if (N0.getOpcode() == ISD::SHL &&
2401         isConstantOrConstantVector(N0.getOperand(1)) &&
2402         N0.getNode()->hasOneUse()) {
2403       Sh = N0; Y = N1;
2404     } else if (N1.getOpcode() == ISD::SHL &&
2405                isConstantOrConstantVector(N1.getOperand(1)) &&
2406                N1.getNode()->hasOneUse()) {
2407       Sh = N1; Y = N0;
2408     }
2409 
2410     if (Sh.getNode()) {
2411       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2412       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2413     }
2414   }
2415 
2416   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2417   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2418       N0.getOpcode() == ISD::ADD &&
2419       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2420       isMulAddWithConstProfitable(N, N0, N1))
2421       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2422                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2423                                      N0.getOperand(0), N1),
2424                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2425                                      N0.getOperand(1), N1));
2426 
2427   // reassociate mul
2428   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2429     return RMUL;
2430 
2431   return SDValue();
2432 }
2433 
2434 /// Return true if divmod libcall is available.
2435 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2436                                      const TargetLowering &TLI) {
2437   RTLIB::Libcall LC;
2438   EVT NodeType = Node->getValueType(0);
2439   if (!NodeType.isSimple())
2440     return false;
2441   switch (NodeType.getSimpleVT().SimpleTy) {
2442   default: return false; // No libcall for vector types.
2443   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2444   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2445   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2446   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2447   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2448   }
2449 
2450   return TLI.getLibcallName(LC) != nullptr;
2451 }
2452 
2453 /// Issue divrem if both quotient and remainder are needed.
2454 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2455   if (Node->use_empty())
2456     return SDValue(); // This is a dead node, leave it alone.
2457 
2458   unsigned Opcode = Node->getOpcode();
2459   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2460   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2461 
2462   // DivMod lib calls can still work on non-legal types if using lib-calls.
2463   EVT VT = Node->getValueType(0);
2464   if (VT.isVector() || !VT.isInteger())
2465     return SDValue();
2466 
2467   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2468     return SDValue();
2469 
2470   // If DIVREM is going to get expanded into a libcall,
2471   // but there is no libcall available, then don't combine.
2472   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2473       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2474     return SDValue();
2475 
2476   // If div is legal, it's better to do the normal expansion
2477   unsigned OtherOpcode = 0;
2478   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2479     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2480     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2481       return SDValue();
2482   } else {
2483     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2484     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2485       return SDValue();
2486   }
2487 
2488   SDValue Op0 = Node->getOperand(0);
2489   SDValue Op1 = Node->getOperand(1);
2490   SDValue combined;
2491   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2492          UE = Op0.getNode()->use_end(); UI != UE;) {
2493     SDNode *User = *UI++;
2494     if (User == Node || User->use_empty())
2495       continue;
2496     // Convert the other matching node(s), too;
2497     // otherwise, the DIVREM may get target-legalized into something
2498     // target-specific that we won't be able to recognize.
2499     unsigned UserOpc = User->getOpcode();
2500     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2501         User->getOperand(0) == Op0 &&
2502         User->getOperand(1) == Op1) {
2503       if (!combined) {
2504         if (UserOpc == OtherOpcode) {
2505           SDVTList VTs = DAG.getVTList(VT, VT);
2506           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2507         } else if (UserOpc == DivRemOpc) {
2508           combined = SDValue(User, 0);
2509         } else {
2510           assert(UserOpc == Opcode);
2511           continue;
2512         }
2513       }
2514       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2515         CombineTo(User, combined);
2516       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2517         CombineTo(User, combined.getValue(1));
2518     }
2519   }
2520   return combined;
2521 }
2522 
2523 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2524   SDValue N0 = N->getOperand(0);
2525   SDValue N1 = N->getOperand(1);
2526   EVT VT = N->getValueType(0);
2527   SDLoc DL(N);
2528 
2529   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2530     return DAG.getUNDEF(VT);
2531 
2532   // undef / X -> 0
2533   // undef % X -> 0
2534   if (N0.isUndef())
2535     return DAG.getConstant(0, DL, VT);
2536 
2537   return SDValue();
2538 }
2539 
2540 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2541   SDValue N0 = N->getOperand(0);
2542   SDValue N1 = N->getOperand(1);
2543   EVT VT = N->getValueType(0);
2544 
2545   // fold vector ops
2546   if (VT.isVector())
2547     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2548       return FoldedVOp;
2549 
2550   SDLoc DL(N);
2551 
2552   // fold (sdiv c1, c2) -> c1/c2
2553   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2554   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2555   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2556     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2557   // fold (sdiv X, 1) -> X
2558   if (N1C && N1C->isOne())
2559     return N0;
2560   // fold (sdiv X, -1) -> 0-X
2561   if (N1C && N1C->isAllOnesValue())
2562     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2563 
2564   if (SDValue V = simplifyDivRem(N, DAG))
2565     return V;
2566 
2567   if (SDValue NewSel = foldBinOpIntoSelect(N))
2568     return NewSel;
2569 
2570   // If we know the sign bits of both operands are zero, strength reduce to a
2571   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2572   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2573     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2574 
2575   // fold (sdiv X, pow2) -> simple ops after legalize
2576   // FIXME: We check for the exact bit here because the generic lowering gives
2577   // better results in that case. The target-specific lowering should learn how
2578   // to handle exact sdivs efficiently.
2579   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2580       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2581       (N1C->getAPIntValue().isPowerOf2() ||
2582        (-N1C->getAPIntValue()).isPowerOf2())) {
2583     // Target-specific implementation of sdiv x, pow2.
2584     if (SDValue Res = BuildSDIVPow2(N))
2585       return Res;
2586 
2587     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2588 
2589     // Splat the sign bit into the register
2590     SDValue SGN =
2591         DAG.getNode(ISD::SRA, DL, VT, N0,
2592                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2593                                     getShiftAmountTy(N0.getValueType())));
2594     AddToWorklist(SGN.getNode());
2595 
2596     // Add (N0 < 0) ? abs2 - 1 : 0;
2597     SDValue SRL =
2598         DAG.getNode(ISD::SRL, DL, VT, SGN,
2599                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2600                                     getShiftAmountTy(SGN.getValueType())));
2601     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2602     AddToWorklist(SRL.getNode());
2603     AddToWorklist(ADD.getNode());    // Divide by pow2
2604     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2605                   DAG.getConstant(lg2, DL,
2606                                   getShiftAmountTy(ADD.getValueType())));
2607 
2608     // If we're dividing by a positive value, we're done.  Otherwise, we must
2609     // negate the result.
2610     if (N1C->getAPIntValue().isNonNegative())
2611       return SRA;
2612 
2613     AddToWorklist(SRA.getNode());
2614     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2615   }
2616 
2617   // If integer divide is expensive and we satisfy the requirements, emit an
2618   // alternate sequence.  Targets may check function attributes for size/speed
2619   // trade-offs.
2620   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2621   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2622     if (SDValue Op = BuildSDIV(N))
2623       return Op;
2624 
2625   // sdiv, srem -> sdivrem
2626   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2627   // true.  Otherwise, we break the simplification logic in visitREM().
2628   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2629     if (SDValue DivRem = useDivRem(N))
2630         return DivRem;
2631 
2632   return SDValue();
2633 }
2634 
2635 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2636   SDValue N0 = N->getOperand(0);
2637   SDValue N1 = N->getOperand(1);
2638   EVT VT = N->getValueType(0);
2639 
2640   // fold vector ops
2641   if (VT.isVector())
2642     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2643       return FoldedVOp;
2644 
2645   SDLoc DL(N);
2646 
2647   // fold (udiv c1, c2) -> c1/c2
2648   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2649   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2650   if (N0C && N1C)
2651     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2652                                                     N0C, N1C))
2653       return Folded;
2654 
2655   if (SDValue V = simplifyDivRem(N, DAG))
2656     return V;
2657 
2658   if (SDValue NewSel = foldBinOpIntoSelect(N))
2659     return NewSel;
2660 
2661   // fold (udiv x, (1 << c)) -> x >>u c
2662   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2663       DAG.isKnownToBeAPowerOfTwo(N1)) {
2664     SDValue LogBase2 = BuildLogBase2(N1, DL);
2665     AddToWorklist(LogBase2.getNode());
2666 
2667     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2668     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2669     AddToWorklist(Trunc.getNode());
2670     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2671   }
2672 
2673   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2674   if (N1.getOpcode() == ISD::SHL) {
2675     SDValue N10 = N1.getOperand(0);
2676     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2677         DAG.isKnownToBeAPowerOfTwo(N10)) {
2678       SDValue LogBase2 = BuildLogBase2(N10, DL);
2679       AddToWorklist(LogBase2.getNode());
2680 
2681       EVT ADDVT = N1.getOperand(1).getValueType();
2682       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2683       AddToWorklist(Trunc.getNode());
2684       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2685       AddToWorklist(Add.getNode());
2686       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2687     }
2688   }
2689 
2690   // fold (udiv x, c) -> alternate
2691   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2692   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2693     if (SDValue Op = BuildUDIV(N))
2694       return Op;
2695 
2696   // sdiv, srem -> sdivrem
2697   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2698   // true.  Otherwise, we break the simplification logic in visitREM().
2699   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2700     if (SDValue DivRem = useDivRem(N))
2701         return DivRem;
2702 
2703   return SDValue();
2704 }
2705 
2706 // handles ISD::SREM and ISD::UREM
2707 SDValue DAGCombiner::visitREM(SDNode *N) {
2708   unsigned Opcode = N->getOpcode();
2709   SDValue N0 = N->getOperand(0);
2710   SDValue N1 = N->getOperand(1);
2711   EVT VT = N->getValueType(0);
2712   bool isSigned = (Opcode == ISD::SREM);
2713   SDLoc DL(N);
2714 
2715   // fold (rem c1, c2) -> c1%c2
2716   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2717   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2718   if (N0C && N1C)
2719     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2720       return Folded;
2721 
2722   if (SDValue V = simplifyDivRem(N, DAG))
2723     return V;
2724 
2725   if (SDValue NewSel = foldBinOpIntoSelect(N))
2726     return NewSel;
2727 
2728   if (isSigned) {
2729     // If we know the sign bits of both operands are zero, strength reduce to a
2730     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2731     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2732       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2733   } else {
2734     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2735     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2736       // fold (urem x, pow2) -> (and x, pow2-1)
2737       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2738       AddToWorklist(Add.getNode());
2739       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2740     }
2741     if (N1.getOpcode() == ISD::SHL &&
2742         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2743       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2744       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2745       AddToWorklist(Add.getNode());
2746       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2747     }
2748   }
2749 
2750   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2751 
2752   // If X/C can be simplified by the division-by-constant logic, lower
2753   // X%C to the equivalent of X-X/C*C.
2754   // To avoid mangling nodes, this simplification requires that the combine()
2755   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2756   // against this by skipping the simplification if isIntDivCheap().  When
2757   // div is not cheap, combine will not return a DIVREM.  Regardless,
2758   // checking cheapness here makes sense since the simplification results in
2759   // fatter code.
2760   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2761     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2762     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2763     AddToWorklist(Div.getNode());
2764     SDValue OptimizedDiv = combine(Div.getNode());
2765     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2766       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2767              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2768       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2769       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2770       AddToWorklist(Mul.getNode());
2771       return Sub;
2772     }
2773   }
2774 
2775   // sdiv, srem -> sdivrem
2776   if (SDValue DivRem = useDivRem(N))
2777     return DivRem.getValue(1);
2778 
2779   return SDValue();
2780 }
2781 
2782 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2783   SDValue N0 = N->getOperand(0);
2784   SDValue N1 = N->getOperand(1);
2785   EVT VT = N->getValueType(0);
2786   SDLoc DL(N);
2787 
2788   // fold (mulhs x, 0) -> 0
2789   if (isNullConstant(N1))
2790     return N1;
2791   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2792   if (isOneConstant(N1)) {
2793     SDLoc DL(N);
2794     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2795                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
2796                                        getShiftAmountTy(N0.getValueType())));
2797   }
2798   // fold (mulhs x, undef) -> 0
2799   if (N0.isUndef() || N1.isUndef())
2800     return DAG.getConstant(0, SDLoc(N), VT);
2801 
2802   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2803   // plus a shift.
2804   if (VT.isSimple() && !VT.isVector()) {
2805     MVT Simple = VT.getSimpleVT();
2806     unsigned SimpleSize = Simple.getSizeInBits();
2807     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2808     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2809       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2810       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2811       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2812       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2813             DAG.getConstant(SimpleSize, DL,
2814                             getShiftAmountTy(N1.getValueType())));
2815       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2816     }
2817   }
2818 
2819   return SDValue();
2820 }
2821 
2822 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2823   SDValue N0 = N->getOperand(0);
2824   SDValue N1 = N->getOperand(1);
2825   EVT VT = N->getValueType(0);
2826   SDLoc DL(N);
2827 
2828   // fold (mulhu x, 0) -> 0
2829   if (isNullConstant(N1))
2830     return N1;
2831   // fold (mulhu x, 1) -> 0
2832   if (isOneConstant(N1))
2833     return DAG.getConstant(0, DL, N0.getValueType());
2834   // fold (mulhu x, undef) -> 0
2835   if (N0.isUndef() || N1.isUndef())
2836     return DAG.getConstant(0, DL, VT);
2837 
2838   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2839   // plus a shift.
2840   if (VT.isSimple() && !VT.isVector()) {
2841     MVT Simple = VT.getSimpleVT();
2842     unsigned SimpleSize = Simple.getSizeInBits();
2843     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2844     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2845       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2846       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2847       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2848       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2849             DAG.getConstant(SimpleSize, DL,
2850                             getShiftAmountTy(N1.getValueType())));
2851       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2852     }
2853   }
2854 
2855   return SDValue();
2856 }
2857 
2858 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2859 /// give the opcodes for the two computations that are being performed. Return
2860 /// true if a simplification was made.
2861 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2862                                                 unsigned HiOp) {
2863   // If the high half is not needed, just compute the low half.
2864   bool HiExists = N->hasAnyUseOfValue(1);
2865   if (!HiExists &&
2866       (!LegalOperations ||
2867        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2868     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2869     return CombineTo(N, Res, Res);
2870   }
2871 
2872   // If the low half is not needed, just compute the high half.
2873   bool LoExists = N->hasAnyUseOfValue(0);
2874   if (!LoExists &&
2875       (!LegalOperations ||
2876        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2877     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2878     return CombineTo(N, Res, Res);
2879   }
2880 
2881   // If both halves are used, return as it is.
2882   if (LoExists && HiExists)
2883     return SDValue();
2884 
2885   // If the two computed results can be simplified separately, separate them.
2886   if (LoExists) {
2887     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2888     AddToWorklist(Lo.getNode());
2889     SDValue LoOpt = combine(Lo.getNode());
2890     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2891         (!LegalOperations ||
2892          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2893       return CombineTo(N, LoOpt, LoOpt);
2894   }
2895 
2896   if (HiExists) {
2897     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2898     AddToWorklist(Hi.getNode());
2899     SDValue HiOpt = combine(Hi.getNode());
2900     if (HiOpt.getNode() && HiOpt != Hi &&
2901         (!LegalOperations ||
2902          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2903       return CombineTo(N, HiOpt, HiOpt);
2904   }
2905 
2906   return SDValue();
2907 }
2908 
2909 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2910   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2911     return Res;
2912 
2913   EVT VT = N->getValueType(0);
2914   SDLoc DL(N);
2915 
2916   // If the type is twice as wide is legal, transform the mulhu to a wider
2917   // multiply plus a shift.
2918   if (VT.isSimple() && !VT.isVector()) {
2919     MVT Simple = VT.getSimpleVT();
2920     unsigned SimpleSize = Simple.getSizeInBits();
2921     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2922     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2923       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2924       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2925       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2926       // Compute the high part as N1.
2927       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2928             DAG.getConstant(SimpleSize, DL,
2929                             getShiftAmountTy(Lo.getValueType())));
2930       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2931       // Compute the low part as N0.
2932       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2933       return CombineTo(N, Lo, Hi);
2934     }
2935   }
2936 
2937   return SDValue();
2938 }
2939 
2940 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2941   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2942     return Res;
2943 
2944   EVT VT = N->getValueType(0);
2945   SDLoc DL(N);
2946 
2947   // If the type is twice as wide is legal, transform the mulhu to a wider
2948   // multiply plus a shift.
2949   if (VT.isSimple() && !VT.isVector()) {
2950     MVT Simple = VT.getSimpleVT();
2951     unsigned SimpleSize = Simple.getSizeInBits();
2952     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2953     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2954       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2955       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2956       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2957       // Compute the high part as N1.
2958       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2959             DAG.getConstant(SimpleSize, DL,
2960                             getShiftAmountTy(Lo.getValueType())));
2961       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2962       // Compute the low part as N0.
2963       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2964       return CombineTo(N, Lo, Hi);
2965     }
2966   }
2967 
2968   return SDValue();
2969 }
2970 
2971 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2972   // (smulo x, 2) -> (saddo x, x)
2973   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2974     if (C2->getAPIntValue() == 2)
2975       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2976                          N->getOperand(0), N->getOperand(0));
2977 
2978   return SDValue();
2979 }
2980 
2981 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2982   // (umulo x, 2) -> (uaddo x, x)
2983   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2984     if (C2->getAPIntValue() == 2)
2985       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2986                          N->getOperand(0), N->getOperand(0));
2987 
2988   return SDValue();
2989 }
2990 
2991 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2992   SDValue N0 = N->getOperand(0);
2993   SDValue N1 = N->getOperand(1);
2994   EVT VT = N0.getValueType();
2995 
2996   // fold vector ops
2997   if (VT.isVector())
2998     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2999       return FoldedVOp;
3000 
3001   // fold (add c1, c2) -> c1+c2
3002   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3003   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3004   if (N0C && N1C)
3005     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3006 
3007   // canonicalize constant to RHS
3008   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3009      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3010     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3011 
3012   return SDValue();
3013 }
3014 
3015 /// If this is a binary operator with two operands of the same opcode, try to
3016 /// simplify it.
3017 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3018   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3019   EVT VT = N0.getValueType();
3020   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3021 
3022   // Bail early if none of these transforms apply.
3023   if (N0.getNumOperands() == 0) return SDValue();
3024 
3025   // For each of OP in AND/OR/XOR:
3026   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3027   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3028   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3029   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3030   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3031   //
3032   // do not sink logical op inside of a vector extend, since it may combine
3033   // into a vsetcc.
3034   EVT Op0VT = N0.getOperand(0).getValueType();
3035   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3036        N0.getOpcode() == ISD::SIGN_EXTEND ||
3037        N0.getOpcode() == ISD::BSWAP ||
3038        // Avoid infinite looping with PromoteIntBinOp.
3039        (N0.getOpcode() == ISD::ANY_EXTEND &&
3040         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3041        (N0.getOpcode() == ISD::TRUNCATE &&
3042         (!TLI.isZExtFree(VT, Op0VT) ||
3043          !TLI.isTruncateFree(Op0VT, VT)) &&
3044         TLI.isTypeLegal(Op0VT))) &&
3045       !VT.isVector() &&
3046       Op0VT == N1.getOperand(0).getValueType() &&
3047       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3048     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3049                                  N0.getOperand(0).getValueType(),
3050                                  N0.getOperand(0), N1.getOperand(0));
3051     AddToWorklist(ORNode.getNode());
3052     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3053   }
3054 
3055   // For each of OP in SHL/SRL/SRA/AND...
3056   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3057   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3058   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3059   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3060        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3061       N0.getOperand(1) == N1.getOperand(1)) {
3062     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3063                                  N0.getOperand(0).getValueType(),
3064                                  N0.getOperand(0), N1.getOperand(0));
3065     AddToWorklist(ORNode.getNode());
3066     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3067                        ORNode, N0.getOperand(1));
3068   }
3069 
3070   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3071   // Only perform this optimization up until type legalization, before
3072   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3073   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3074   // we don't want to undo this promotion.
3075   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3076   // on scalars.
3077   if ((N0.getOpcode() == ISD::BITCAST ||
3078        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3079        Level <= AfterLegalizeTypes) {
3080     SDValue In0 = N0.getOperand(0);
3081     SDValue In1 = N1.getOperand(0);
3082     EVT In0Ty = In0.getValueType();
3083     EVT In1Ty = In1.getValueType();
3084     SDLoc DL(N);
3085     // If both incoming values are integers, and the original types are the
3086     // same.
3087     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3088       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3089       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3090       AddToWorklist(Op.getNode());
3091       return BC;
3092     }
3093   }
3094 
3095   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3096   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3097   // If both shuffles use the same mask, and both shuffle within a single
3098   // vector, then it is worthwhile to move the swizzle after the operation.
3099   // The type-legalizer generates this pattern when loading illegal
3100   // vector types from memory. In many cases this allows additional shuffle
3101   // optimizations.
3102   // There are other cases where moving the shuffle after the xor/and/or
3103   // is profitable even if shuffles don't perform a swizzle.
3104   // If both shuffles use the same mask, and both shuffles have the same first
3105   // or second operand, then it might still be profitable to move the shuffle
3106   // after the xor/and/or operation.
3107   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3108     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3109     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3110 
3111     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3112            "Inputs to shuffles are not the same type");
3113 
3114     // Check that both shuffles use the same mask. The masks are known to be of
3115     // the same length because the result vector type is the same.
3116     // Check also that shuffles have only one use to avoid introducing extra
3117     // instructions.
3118     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3119         SVN0->getMask().equals(SVN1->getMask())) {
3120       SDValue ShOp = N0->getOperand(1);
3121 
3122       // Don't try to fold this node if it requires introducing a
3123       // build vector of all zeros that might be illegal at this stage.
3124       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3125         if (!LegalTypes)
3126           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3127         else
3128           ShOp = SDValue();
3129       }
3130 
3131       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3132       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3133       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3134       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3135         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3136                                       N0->getOperand(0), N1->getOperand(0));
3137         AddToWorklist(NewNode.getNode());
3138         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3139                                     SVN0->getMask());
3140       }
3141 
3142       // Don't try to fold this node if it requires introducing a
3143       // build vector of all zeros that might be illegal at this stage.
3144       ShOp = N0->getOperand(0);
3145       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3146         if (!LegalTypes)
3147           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3148         else
3149           ShOp = SDValue();
3150       }
3151 
3152       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3153       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3154       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3155       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3156         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3157                                       N0->getOperand(1), N1->getOperand(1));
3158         AddToWorklist(NewNode.getNode());
3159         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3160                                     SVN0->getMask());
3161       }
3162     }
3163   }
3164 
3165   return SDValue();
3166 }
3167 
3168 /// This contains all DAGCombine rules which reduce two values combined by
3169 /// an And operation to a single value. This makes them reusable in the context
3170 /// of visitSELECT(). Rules involving constants are not included as
3171 /// visitSELECT() already handles those cases.
3172 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
3173                                   SDNode *LocReference) {
3174   EVT VT = N1.getValueType();
3175 
3176   // fold (and x, undef) -> 0
3177   if (N0.isUndef() || N1.isUndef())
3178     return DAG.getConstant(0, SDLoc(LocReference), VT);
3179   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
3180   SDValue LL, LR, RL, RR, CC0, CC1;
3181   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3182     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3183     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3184 
3185     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
3186         LL.getValueType().isInteger()) {
3187       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
3188       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
3189         EVT CCVT = getSetCCResultType(LR.getValueType());
3190         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3191           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
3192                                        LR.getValueType(), LL, RL);
3193           AddToWorklist(ORNode.getNode());
3194           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3195         }
3196       }
3197       if (isAllOnesConstant(LR)) {
3198         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
3199         if (Op1 == ISD::SETEQ) {
3200           EVT CCVT = getSetCCResultType(LR.getValueType());
3201           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3202             SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
3203                                           LR.getValueType(), LL, RL);
3204             AddToWorklist(ANDNode.getNode());
3205             return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3206           }
3207         }
3208         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
3209         if (Op1 == ISD::SETGT) {
3210           EVT CCVT = getSetCCResultType(LR.getValueType());
3211           if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3212             SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
3213                                          LR.getValueType(), LL, RL);
3214             AddToWorklist(ORNode.getNode());
3215             return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3216           }
3217         }
3218       }
3219     }
3220     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
3221     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
3222         Op0 == Op1 && LL.getValueType().isInteger() &&
3223       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3224                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3225       EVT CCVT = getSetCCResultType(LL.getValueType());
3226       if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3227         SDLoc DL(N0);
3228         SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
3229                                       LL, DAG.getConstant(1, DL,
3230                                                           LL.getValueType()));
3231         AddToWorklist(ADDNode.getNode());
3232         return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
3233                             DAG.getConstant(2, DL, LL.getValueType()),
3234                             ISD::SETUGE);
3235       }
3236     }
3237     // canonicalize equivalent to ll == rl
3238     if (LL == RR && LR == RL) {
3239       Op1 = ISD::getSetCCSwappedOperands(Op1);
3240       std::swap(RL, RR);
3241     }
3242     if (LL == RL && LR == RR) {
3243       bool isInteger = LL.getValueType().isInteger();
3244       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
3245       if (Result != ISD::SETCC_INVALID &&
3246           (!LegalOperations ||
3247            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3248             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3249         EVT CCVT = getSetCCResultType(LL.getValueType());
3250         if (N0.getValueType() == CCVT ||
3251             (!LegalOperations && N0.getValueType() == MVT::i1))
3252           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3253                               LL, LR, Result);
3254       }
3255     }
3256   }
3257 
3258   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3259       VT.getSizeInBits() <= 64) {
3260     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3261       APInt ADDC = ADDI->getAPIntValue();
3262       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3263         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3264         // immediate for an add, but it is legal if its top c2 bits are set,
3265         // transform the ADD so the immediate doesn't need to be materialized
3266         // in a register.
3267         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3268           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3269                                              SRLI->getZExtValue());
3270           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3271             ADDC |= Mask;
3272             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3273               SDLoc DL(N0);
3274               SDValue NewAdd =
3275                 DAG.getNode(ISD::ADD, DL, VT,
3276                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3277               CombineTo(N0.getNode(), NewAdd);
3278               // Return N so it doesn't get rechecked!
3279               return SDValue(LocReference, 0);
3280             }
3281           }
3282         }
3283       }
3284     }
3285   }
3286 
3287   // Reduce bit extract of low half of an integer to the narrower type.
3288   // (and (srl i64:x, K), KMask) ->
3289   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3290   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3291     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3292       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3293         unsigned Size = VT.getSizeInBits();
3294         const APInt &AndMask = CAnd->getAPIntValue();
3295         unsigned ShiftBits = CShift->getZExtValue();
3296 
3297         // Bail out, this node will probably disappear anyway.
3298         if (ShiftBits == 0)
3299           return SDValue();
3300 
3301         unsigned MaskBits = AndMask.countTrailingOnes();
3302         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3303 
3304         if (APIntOps::isMask(AndMask) &&
3305             // Required bits must not span the two halves of the integer and
3306             // must fit in the half size type.
3307             (ShiftBits + MaskBits <= Size / 2) &&
3308             TLI.isNarrowingProfitable(VT, HalfVT) &&
3309             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3310             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3311             TLI.isTruncateFree(VT, HalfVT) &&
3312             TLI.isZExtFree(HalfVT, VT)) {
3313           // The isNarrowingProfitable is to avoid regressions on PPC and
3314           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3315           // on downstream users of this. Those patterns could probably be
3316           // extended to handle extensions mixed in.
3317 
3318           SDValue SL(N0);
3319           assert(MaskBits <= Size);
3320 
3321           // Extracting the highest bit of the low half.
3322           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3323           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3324                                       N0.getOperand(0));
3325 
3326           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3327           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3328           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3329           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3330           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3331         }
3332       }
3333     }
3334   }
3335 
3336   return SDValue();
3337 }
3338 
3339 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3340                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3341                                    bool &NarrowLoad) {
3342   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3343 
3344   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
3345     return false;
3346 
3347   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3348   LoadedVT = LoadN->getMemoryVT();
3349 
3350   if (ExtVT == LoadedVT &&
3351       (!LegalOperations ||
3352        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3353     // ZEXTLOAD will match without needing to change the size of the value being
3354     // loaded.
3355     NarrowLoad = false;
3356     return true;
3357   }
3358 
3359   // Do not change the width of a volatile load.
3360   if (LoadN->isVolatile())
3361     return false;
3362 
3363   // Do not generate loads of non-round integer types since these can
3364   // be expensive (and would be wrong if the type is not byte sized).
3365   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3366     return false;
3367 
3368   if (LegalOperations &&
3369       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3370     return false;
3371 
3372   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3373     return false;
3374 
3375   NarrowLoad = true;
3376   return true;
3377 }
3378 
3379 SDValue DAGCombiner::visitAND(SDNode *N) {
3380   SDValue N0 = N->getOperand(0);
3381   SDValue N1 = N->getOperand(1);
3382   EVT VT = N1.getValueType();
3383 
3384   // x & x --> x
3385   if (N0 == N1)
3386     return N0;
3387 
3388   // fold vector ops
3389   if (VT.isVector()) {
3390     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3391       return FoldedVOp;
3392 
3393     // fold (and x, 0) -> 0, vector edition
3394     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3395       // do not return N0, because undef node may exist in N0
3396       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3397                              SDLoc(N), N0.getValueType());
3398     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3399       // do not return N1, because undef node may exist in N1
3400       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3401                              SDLoc(N), N1.getValueType());
3402 
3403     // fold (and x, -1) -> x, vector edition
3404     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3405       return N1;
3406     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3407       return N0;
3408   }
3409 
3410   // fold (and c1, c2) -> c1&c2
3411   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3412   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3413   if (N0C && N1C && !N1C->isOpaque())
3414     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3415   // canonicalize constant to RHS
3416   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3417      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3418     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3419   // fold (and x, -1) -> x
3420   if (isAllOnesConstant(N1))
3421     return N0;
3422   // if (and x, c) is known to be zero, return 0
3423   unsigned BitWidth = VT.getScalarSizeInBits();
3424   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3425                                    APInt::getAllOnesValue(BitWidth)))
3426     return DAG.getConstant(0, SDLoc(N), VT);
3427 
3428   if (SDValue NewSel = foldBinOpIntoSelect(N))
3429     return NewSel;
3430 
3431   // reassociate and
3432   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3433     return RAND;
3434   // fold (and (or x, C), D) -> D if (C & D) == D
3435   if (N1C && N0.getOpcode() == ISD::OR)
3436     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3437       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3438         return N1;
3439   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3440   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3441     SDValue N0Op0 = N0.getOperand(0);
3442     APInt Mask = ~N1C->getAPIntValue();
3443     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3444     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3445       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3446                                  N0.getValueType(), N0Op0);
3447 
3448       // Replace uses of the AND with uses of the Zero extend node.
3449       CombineTo(N, Zext);
3450 
3451       // We actually want to replace all uses of the any_extend with the
3452       // zero_extend, to avoid duplicating things.  This will later cause this
3453       // AND to be folded.
3454       CombineTo(N0.getNode(), Zext);
3455       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3456     }
3457   }
3458   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3459   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3460   // already be zero by virtue of the width of the base type of the load.
3461   //
3462   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3463   // more cases.
3464   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3465        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3466        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3467        N0.getOperand(0).getResNo() == 0) ||
3468       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3469     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3470                                          N0 : N0.getOperand(0) );
3471 
3472     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3473     // This can be a pure constant or a vector splat, in which case we treat the
3474     // vector as a scalar and use the splat value.
3475     APInt Constant = APInt::getNullValue(1);
3476     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3477       Constant = C->getAPIntValue();
3478     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3479       APInt SplatValue, SplatUndef;
3480       unsigned SplatBitSize;
3481       bool HasAnyUndefs;
3482       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3483                                              SplatBitSize, HasAnyUndefs);
3484       if (IsSplat) {
3485         // Undef bits can contribute to a possible optimisation if set, so
3486         // set them.
3487         SplatValue |= SplatUndef;
3488 
3489         // The splat value may be something like "0x00FFFFFF", which means 0 for
3490         // the first vector value and FF for the rest, repeating. We need a mask
3491         // that will apply equally to all members of the vector, so AND all the
3492         // lanes of the constant together.
3493         EVT VT = Vector->getValueType(0);
3494         unsigned BitWidth = VT.getScalarSizeInBits();
3495 
3496         // If the splat value has been compressed to a bitlength lower
3497         // than the size of the vector lane, we need to re-expand it to
3498         // the lane size.
3499         if (BitWidth > SplatBitSize)
3500           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3501                SplatBitSize < BitWidth;
3502                SplatBitSize = SplatBitSize * 2)
3503             SplatValue |= SplatValue.shl(SplatBitSize);
3504 
3505         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3506         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3507         if (SplatBitSize % BitWidth == 0) {
3508           Constant = APInt::getAllOnesValue(BitWidth);
3509           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3510             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3511         }
3512       }
3513     }
3514 
3515     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3516     // actually legal and isn't going to get expanded, else this is a false
3517     // optimisation.
3518     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3519                                                     Load->getValueType(0),
3520                                                     Load->getMemoryVT());
3521 
3522     // Resize the constant to the same size as the original memory access before
3523     // extension. If it is still the AllOnesValue then this AND is completely
3524     // unneeded.
3525     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3526 
3527     bool B;
3528     switch (Load->getExtensionType()) {
3529     default: B = false; break;
3530     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3531     case ISD::ZEXTLOAD:
3532     case ISD::NON_EXTLOAD: B = true; break;
3533     }
3534 
3535     if (B && Constant.isAllOnesValue()) {
3536       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3537       // preserve semantics once we get rid of the AND.
3538       SDValue NewLoad(Load, 0);
3539       if (Load->getExtensionType() == ISD::EXTLOAD) {
3540         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3541                               Load->getValueType(0), SDLoc(Load),
3542                               Load->getChain(), Load->getBasePtr(),
3543                               Load->getOffset(), Load->getMemoryVT(),
3544                               Load->getMemOperand());
3545         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3546         if (Load->getNumValues() == 3) {
3547           // PRE/POST_INC loads have 3 values.
3548           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3549                            NewLoad.getValue(2) };
3550           CombineTo(Load, To, 3, true);
3551         } else {
3552           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3553         }
3554       }
3555 
3556       // Fold the AND away, taking care not to fold to the old load node if we
3557       // replaced it.
3558       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3559 
3560       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3561     }
3562   }
3563 
3564   // fold (and (load x), 255) -> (zextload x, i8)
3565   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3566   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3567   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3568                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3569                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3570     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3571     LoadSDNode *LN0 = HasAnyExt
3572       ? cast<LoadSDNode>(N0.getOperand(0))
3573       : cast<LoadSDNode>(N0);
3574     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3575         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3576       auto NarrowLoad = false;
3577       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3578       EVT ExtVT, LoadedVT;
3579       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3580                            NarrowLoad)) {
3581         if (!NarrowLoad) {
3582           SDValue NewLoad =
3583             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3584                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3585                            LN0->getMemOperand());
3586           AddToWorklist(N);
3587           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3588           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3589         } else {
3590           EVT PtrType = LN0->getOperand(1).getValueType();
3591 
3592           unsigned Alignment = LN0->getAlignment();
3593           SDValue NewPtr = LN0->getBasePtr();
3594 
3595           // For big endian targets, we need to add an offset to the pointer
3596           // to load the correct bytes.  For little endian systems, we merely
3597           // need to read fewer bytes from the same pointer.
3598           if (DAG.getDataLayout().isBigEndian()) {
3599             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3600             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3601             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3602             SDLoc DL(LN0);
3603             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3604                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3605             Alignment = MinAlign(Alignment, PtrOff);
3606           }
3607 
3608           AddToWorklist(NewPtr.getNode());
3609 
3610           SDValue Load = DAG.getExtLoad(
3611               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3612               LN0->getPointerInfo(), ExtVT, Alignment,
3613               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3614           AddToWorklist(N);
3615           CombineTo(LN0, Load, Load.getValue(1));
3616           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3617         }
3618       }
3619     }
3620   }
3621 
3622   if (SDValue Combined = visitANDLike(N0, N1, N))
3623     return Combined;
3624 
3625   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3626   if (N0.getOpcode() == N1.getOpcode())
3627     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3628       return Tmp;
3629 
3630   // Masking the negated extension of a boolean is just the zero-extended
3631   // boolean:
3632   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3633   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3634   //
3635   // Note: the SimplifyDemandedBits fold below can make an information-losing
3636   // transform, and then we have no way to find this better fold.
3637   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3638     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3639     SDValue SubRHS = N0.getOperand(1);
3640     if (SubLHS && SubLHS->isNullValue()) {
3641       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3642           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3643         return SubRHS;
3644       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3645           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3646         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3647     }
3648   }
3649 
3650   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3651   // fold (and (sra)) -> (and (srl)) when possible.
3652   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
3653     return SDValue(N, 0);
3654 
3655   // fold (zext_inreg (extload x)) -> (zextload x)
3656   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3657     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3658     EVT MemVT = LN0->getMemoryVT();
3659     // If we zero all the possible extended bits, then we can turn this into
3660     // a zextload if we are running before legalize or the operation is legal.
3661     unsigned BitWidth = N1.getScalarValueSizeInBits();
3662     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3663                            BitWidth - MemVT.getScalarSizeInBits())) &&
3664         ((!LegalOperations && !LN0->isVolatile()) ||
3665          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3666       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3667                                        LN0->getChain(), LN0->getBasePtr(),
3668                                        MemVT, LN0->getMemOperand());
3669       AddToWorklist(N);
3670       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3671       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3672     }
3673   }
3674   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3675   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3676       N0.hasOneUse()) {
3677     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3678     EVT MemVT = LN0->getMemoryVT();
3679     // If we zero all the possible extended bits, then we can turn this into
3680     // a zextload if we are running before legalize or the operation is legal.
3681     unsigned BitWidth = N1.getScalarValueSizeInBits();
3682     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3683                            BitWidth - MemVT.getScalarSizeInBits())) &&
3684         ((!LegalOperations && !LN0->isVolatile()) ||
3685          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3686       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3687                                        LN0->getChain(), LN0->getBasePtr(),
3688                                        MemVT, LN0->getMemOperand());
3689       AddToWorklist(N);
3690       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3691       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3692     }
3693   }
3694   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3695   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3696     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3697                                            N0.getOperand(1), false))
3698       return BSwap;
3699   }
3700 
3701   return SDValue();
3702 }
3703 
3704 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3705 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3706                                         bool DemandHighBits) {
3707   if (!LegalOperations)
3708     return SDValue();
3709 
3710   EVT VT = N->getValueType(0);
3711   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3712     return SDValue();
3713   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3714     return SDValue();
3715 
3716   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3717   bool LookPassAnd0 = false;
3718   bool LookPassAnd1 = false;
3719   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3720       std::swap(N0, N1);
3721   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3722       std::swap(N0, N1);
3723   if (N0.getOpcode() == ISD::AND) {
3724     if (!N0.getNode()->hasOneUse())
3725       return SDValue();
3726     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3727     if (!N01C || N01C->getZExtValue() != 0xFF00)
3728       return SDValue();
3729     N0 = N0.getOperand(0);
3730     LookPassAnd0 = true;
3731   }
3732 
3733   if (N1.getOpcode() == ISD::AND) {
3734     if (!N1.getNode()->hasOneUse())
3735       return SDValue();
3736     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3737     if (!N11C || N11C->getZExtValue() != 0xFF)
3738       return SDValue();
3739     N1 = N1.getOperand(0);
3740     LookPassAnd1 = true;
3741   }
3742 
3743   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3744     std::swap(N0, N1);
3745   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3746     return SDValue();
3747   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3748     return SDValue();
3749 
3750   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3751   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3752   if (!N01C || !N11C)
3753     return SDValue();
3754   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3755     return SDValue();
3756 
3757   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3758   SDValue N00 = N0->getOperand(0);
3759   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3760     if (!N00.getNode()->hasOneUse())
3761       return SDValue();
3762     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3763     if (!N001C || N001C->getZExtValue() != 0xFF)
3764       return SDValue();
3765     N00 = N00.getOperand(0);
3766     LookPassAnd0 = true;
3767   }
3768 
3769   SDValue N10 = N1->getOperand(0);
3770   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3771     if (!N10.getNode()->hasOneUse())
3772       return SDValue();
3773     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3774     if (!N101C || N101C->getZExtValue() != 0xFF00)
3775       return SDValue();
3776     N10 = N10.getOperand(0);
3777     LookPassAnd1 = true;
3778   }
3779 
3780   if (N00 != N10)
3781     return SDValue();
3782 
3783   // Make sure everything beyond the low halfword gets set to zero since the SRL
3784   // 16 will clear the top bits.
3785   unsigned OpSizeInBits = VT.getSizeInBits();
3786   if (DemandHighBits && OpSizeInBits > 16) {
3787     // If the left-shift isn't masked out then the only way this is a bswap is
3788     // if all bits beyond the low 8 are 0. In that case the entire pattern
3789     // reduces to a left shift anyway: leave it for other parts of the combiner.
3790     if (!LookPassAnd0)
3791       return SDValue();
3792 
3793     // However, if the right shift isn't masked out then it might be because
3794     // it's not needed. See if we can spot that too.
3795     if (!LookPassAnd1 &&
3796         !DAG.MaskedValueIsZero(
3797             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3798       return SDValue();
3799   }
3800 
3801   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3802   if (OpSizeInBits > 16) {
3803     SDLoc DL(N);
3804     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3805                       DAG.getConstant(OpSizeInBits - 16, DL,
3806                                       getShiftAmountTy(VT)));
3807   }
3808   return Res;
3809 }
3810 
3811 /// Return true if the specified node is an element that makes up a 32-bit
3812 /// packed halfword byteswap.
3813 /// ((x & 0x000000ff) << 8) |
3814 /// ((x & 0x0000ff00) >> 8) |
3815 /// ((x & 0x00ff0000) << 8) |
3816 /// ((x & 0xff000000) >> 8)
3817 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3818   if (!N.getNode()->hasOneUse())
3819     return false;
3820 
3821   unsigned Opc = N.getOpcode();
3822   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3823     return false;
3824 
3825   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3826   if (!N1C)
3827     return false;
3828 
3829   unsigned Num;
3830   switch (N1C->getZExtValue()) {
3831   default:
3832     return false;
3833   case 0xFF:       Num = 0; break;
3834   case 0xFF00:     Num = 1; break;
3835   case 0xFF0000:   Num = 2; break;
3836   case 0xFF000000: Num = 3; break;
3837   }
3838 
3839   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3840   SDValue N0 = N.getOperand(0);
3841   if (Opc == ISD::AND) {
3842     if (Num == 0 || Num == 2) {
3843       // (x >> 8) & 0xff
3844       // (x >> 8) & 0xff0000
3845       if (N0.getOpcode() != ISD::SRL)
3846         return false;
3847       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3848       if (!C || C->getZExtValue() != 8)
3849         return false;
3850     } else {
3851       // (x << 8) & 0xff00
3852       // (x << 8) & 0xff000000
3853       if (N0.getOpcode() != ISD::SHL)
3854         return false;
3855       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3856       if (!C || C->getZExtValue() != 8)
3857         return false;
3858     }
3859   } else if (Opc == ISD::SHL) {
3860     // (x & 0xff) << 8
3861     // (x & 0xff0000) << 8
3862     if (Num != 0 && Num != 2)
3863       return false;
3864     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3865     if (!C || C->getZExtValue() != 8)
3866       return false;
3867   } else { // Opc == ISD::SRL
3868     // (x & 0xff00) >> 8
3869     // (x & 0xff000000) >> 8
3870     if (Num != 1 && Num != 3)
3871       return false;
3872     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3873     if (!C || C->getZExtValue() != 8)
3874       return false;
3875   }
3876 
3877   if (Parts[Num])
3878     return false;
3879 
3880   Parts[Num] = N0.getOperand(0).getNode();
3881   return true;
3882 }
3883 
3884 /// Match a 32-bit packed halfword bswap. That is
3885 /// ((x & 0x000000ff) << 8) |
3886 /// ((x & 0x0000ff00) >> 8) |
3887 /// ((x & 0x00ff0000) << 8) |
3888 /// ((x & 0xff000000) >> 8)
3889 /// => (rotl (bswap x), 16)
3890 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3891   if (!LegalOperations)
3892     return SDValue();
3893 
3894   EVT VT = N->getValueType(0);
3895   if (VT != MVT::i32)
3896     return SDValue();
3897   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3898     return SDValue();
3899 
3900   // Look for either
3901   // (or (or (and), (and)), (or (and), (and)))
3902   // (or (or (or (and), (and)), (and)), (and))
3903   if (N0.getOpcode() != ISD::OR)
3904     return SDValue();
3905   SDValue N00 = N0.getOperand(0);
3906   SDValue N01 = N0.getOperand(1);
3907   SDNode *Parts[4] = {};
3908 
3909   if (N1.getOpcode() == ISD::OR &&
3910       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3911     // (or (or (and), (and)), (or (and), (and)))
3912     SDValue N000 = N00.getOperand(0);
3913     if (!isBSwapHWordElement(N000, Parts))
3914       return SDValue();
3915 
3916     SDValue N001 = N00.getOperand(1);
3917     if (!isBSwapHWordElement(N001, Parts))
3918       return SDValue();
3919     SDValue N010 = N01.getOperand(0);
3920     if (!isBSwapHWordElement(N010, Parts))
3921       return SDValue();
3922     SDValue N011 = N01.getOperand(1);
3923     if (!isBSwapHWordElement(N011, Parts))
3924       return SDValue();
3925   } else {
3926     // (or (or (or (and), (and)), (and)), (and))
3927     if (!isBSwapHWordElement(N1, Parts))
3928       return SDValue();
3929     if (!isBSwapHWordElement(N01, Parts))
3930       return SDValue();
3931     if (N00.getOpcode() != ISD::OR)
3932       return SDValue();
3933     SDValue N000 = N00.getOperand(0);
3934     if (!isBSwapHWordElement(N000, Parts))
3935       return SDValue();
3936     SDValue N001 = N00.getOperand(1);
3937     if (!isBSwapHWordElement(N001, Parts))
3938       return SDValue();
3939   }
3940 
3941   // Make sure the parts are all coming from the same node.
3942   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3943     return SDValue();
3944 
3945   SDLoc DL(N);
3946   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3947                               SDValue(Parts[0], 0));
3948 
3949   // Result of the bswap should be rotated by 16. If it's not legal, then
3950   // do  (x << 16) | (x >> 16).
3951   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3952   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3953     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3954   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3955     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3956   return DAG.getNode(ISD::OR, DL, VT,
3957                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3958                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3959 }
3960 
3961 /// This contains all DAGCombine rules which reduce two values combined by
3962 /// an Or operation to a single value \see visitANDLike().
3963 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3964   EVT VT = N1.getValueType();
3965   // fold (or x, undef) -> -1
3966   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
3967     return DAG.getAllOnesConstant(SDLoc(LocReference), VT);
3968 
3969   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3970   SDValue LL, LR, RL, RR, CC0, CC1;
3971   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3972     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3973     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3974 
3975     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3976       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3977       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3978       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3979         EVT CCVT = getSetCCResultType(LR.getValueType());
3980         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3981           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3982                                        LR.getValueType(), LL, RL);
3983           AddToWorklist(ORNode.getNode());
3984           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3985         }
3986       }
3987       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3988       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
3989       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3990         EVT CCVT = getSetCCResultType(LR.getValueType());
3991         if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3992           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3993                                         LR.getValueType(), LL, RL);
3994           AddToWorklist(ANDNode.getNode());
3995           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3996         }
3997       }
3998     }
3999     // canonicalize equivalent to ll == rl
4000     if (LL == RR && LR == RL) {
4001       Op1 = ISD::getSetCCSwappedOperands(Op1);
4002       std::swap(RL, RR);
4003     }
4004     if (LL == RL && LR == RR) {
4005       bool isInteger = LL.getValueType().isInteger();
4006       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
4007       if (Result != ISD::SETCC_INVALID &&
4008           (!LegalOperations ||
4009            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
4010             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
4011         EVT CCVT = getSetCCResultType(LL.getValueType());
4012         if (N0.getValueType() == CCVT ||
4013             (!LegalOperations && N0.getValueType() == MVT::i1))
4014           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
4015                               LL, LR, Result);
4016       }
4017     }
4018   }
4019 
4020   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4021   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4022       // Don't increase # computations.
4023       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4024     // We can only do this xform if we know that bits from X that are set in C2
4025     // but not in C1 are already zero.  Likewise for Y.
4026     if (const ConstantSDNode *N0O1C =
4027         getAsNonOpaqueConstant(N0.getOperand(1))) {
4028       if (const ConstantSDNode *N1O1C =
4029           getAsNonOpaqueConstant(N1.getOperand(1))) {
4030         // We can only do this xform if we know that bits from X that are set in
4031         // C2 but not in C1 are already zero.  Likewise for Y.
4032         const APInt &LHSMask = N0O1C->getAPIntValue();
4033         const APInt &RHSMask = N1O1C->getAPIntValue();
4034 
4035         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4036             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4037           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4038                                   N0.getOperand(0), N1.getOperand(0));
4039           SDLoc DL(LocReference);
4040           return DAG.getNode(ISD::AND, DL, VT, X,
4041                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4042         }
4043       }
4044     }
4045   }
4046 
4047   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4048   if (N0.getOpcode() == ISD::AND &&
4049       N1.getOpcode() == ISD::AND &&
4050       N0.getOperand(0) == N1.getOperand(0) &&
4051       // Don't increase # computations.
4052       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4053     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4054                             N0.getOperand(1), N1.getOperand(1));
4055     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
4056   }
4057 
4058   return SDValue();
4059 }
4060 
4061 SDValue DAGCombiner::visitOR(SDNode *N) {
4062   SDValue N0 = N->getOperand(0);
4063   SDValue N1 = N->getOperand(1);
4064   EVT VT = N1.getValueType();
4065 
4066   // x | x --> x
4067   if (N0 == N1)
4068     return N0;
4069 
4070   // fold vector ops
4071   if (VT.isVector()) {
4072     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4073       return FoldedVOp;
4074 
4075     // fold (or x, 0) -> x, vector edition
4076     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4077       return N1;
4078     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4079       return N0;
4080 
4081     // fold (or x, -1) -> -1, vector edition
4082     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4083       // do not return N0, because undef node may exist in N0
4084       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4085     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4086       // do not return N1, because undef node may exist in N1
4087       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4088 
4089     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4090     // Do this only if the resulting shuffle is legal.
4091     if (isa<ShuffleVectorSDNode>(N0) &&
4092         isa<ShuffleVectorSDNode>(N1) &&
4093         // Avoid folding a node with illegal type.
4094         TLI.isTypeLegal(VT)) {
4095       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4096       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4097       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4098       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4099       // Ensure both shuffles have a zero input.
4100       if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
4101         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4102         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4103         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4104         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4105         bool CanFold = true;
4106         int NumElts = VT.getVectorNumElements();
4107         SmallVector<int, 4> Mask(NumElts);
4108 
4109         for (int i = 0; i != NumElts; ++i) {
4110           int M0 = SV0->getMaskElt(i);
4111           int M1 = SV1->getMaskElt(i);
4112 
4113           // Determine if either index is pointing to a zero vector.
4114           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4115           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4116 
4117           // If one element is zero and the otherside is undef, keep undef.
4118           // This also handles the case that both are undef.
4119           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4120             Mask[i] = -1;
4121             continue;
4122           }
4123 
4124           // Make sure only one of the elements is zero.
4125           if (M0Zero == M1Zero) {
4126             CanFold = false;
4127             break;
4128           }
4129 
4130           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4131 
4132           // We have a zero and non-zero element. If the non-zero came from
4133           // SV0 make the index a LHS index. If it came from SV1, make it
4134           // a RHS index. We need to mod by NumElts because we don't care
4135           // which operand it came from in the original shuffles.
4136           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4137         }
4138 
4139         if (CanFold) {
4140           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4141           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4142 
4143           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4144           if (!LegalMask) {
4145             std::swap(NewLHS, NewRHS);
4146             ShuffleVectorSDNode::commuteMask(Mask);
4147             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4148           }
4149 
4150           if (LegalMask)
4151             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4152         }
4153       }
4154     }
4155   }
4156 
4157   // fold (or c1, c2) -> c1|c2
4158   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4159   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4160   if (N0C && N1C && !N1C->isOpaque())
4161     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4162   // canonicalize constant to RHS
4163   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4164      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4165     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4166   // fold (or x, 0) -> x
4167   if (isNullConstant(N1))
4168     return N0;
4169   // fold (or x, -1) -> -1
4170   if (isAllOnesConstant(N1))
4171     return N1;
4172 
4173   if (SDValue NewSel = foldBinOpIntoSelect(N))
4174     return NewSel;
4175 
4176   // fold (or x, c) -> c iff (x & ~c) == 0
4177   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4178     return N1;
4179 
4180   if (SDValue Combined = visitORLike(N0, N1, N))
4181     return Combined;
4182 
4183   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4184   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4185     return BSwap;
4186   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4187     return BSwap;
4188 
4189   // reassociate or
4190   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4191     return ROR;
4192   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4193   // iff (c1 & c2) == 0.
4194   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4195              isa<ConstantSDNode>(N0.getOperand(1))) {
4196     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
4197     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
4198       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
4199                                                    N1C, C1))
4200         return DAG.getNode(
4201             ISD::AND, SDLoc(N), VT,
4202             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4203       return SDValue();
4204     }
4205   }
4206   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4207   if (N0.getOpcode() == N1.getOpcode())
4208     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4209       return Tmp;
4210 
4211   // See if this is some rotate idiom.
4212   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4213     return SDValue(Rot, 0);
4214 
4215   if (SDValue Load = MatchLoadCombine(N))
4216     return Load;
4217 
4218   // Simplify the operands using demanded-bits information.
4219   if (!VT.isVector() &&
4220       SimplifyDemandedBits(SDValue(N, 0)))
4221     return SDValue(N, 0);
4222 
4223   return SDValue();
4224 }
4225 
4226 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4227 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4228   if (Op.getOpcode() == ISD::AND) {
4229     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4230       Mask = Op.getOperand(1);
4231       Op = Op.getOperand(0);
4232     } else {
4233       return false;
4234     }
4235   }
4236 
4237   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4238     Shift = Op;
4239     return true;
4240   }
4241 
4242   return false;
4243 }
4244 
4245 // Return true if we can prove that, whenever Neg and Pos are both in the
4246 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4247 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4248 //
4249 //     (or (shift1 X, Neg), (shift2 X, Pos))
4250 //
4251 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4252 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4253 // to consider shift amounts with defined behavior.
4254 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4255   // If EltSize is a power of 2 then:
4256   //
4257   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4258   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4259   //
4260   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4261   // for the stronger condition:
4262   //
4263   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4264   //
4265   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4266   // we can just replace Neg with Neg' for the rest of the function.
4267   //
4268   // In other cases we check for the even stronger condition:
4269   //
4270   //     Neg == EltSize - Pos                                    [B]
4271   //
4272   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4273   // behavior if Pos == 0 (and consequently Neg == EltSize).
4274   //
4275   // We could actually use [A] whenever EltSize is a power of 2, but the
4276   // only extra cases that it would match are those uninteresting ones
4277   // where Neg and Pos are never in range at the same time.  E.g. for
4278   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4279   // as well as (sub 32, Pos), but:
4280   //
4281   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4282   //
4283   // always invokes undefined behavior for 32-bit X.
4284   //
4285   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4286   unsigned MaskLoBits = 0;
4287   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4288     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4289       if (NegC->getAPIntValue() == EltSize - 1) {
4290         Neg = Neg.getOperand(0);
4291         MaskLoBits = Log2_64(EltSize);
4292       }
4293     }
4294   }
4295 
4296   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4297   if (Neg.getOpcode() != ISD::SUB)
4298     return false;
4299   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4300   if (!NegC)
4301     return false;
4302   SDValue NegOp1 = Neg.getOperand(1);
4303 
4304   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4305   // Pos'.  The truncation is redundant for the purpose of the equality.
4306   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4307     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4308       if (PosC->getAPIntValue() == EltSize - 1)
4309         Pos = Pos.getOperand(0);
4310 
4311   // The condition we need is now:
4312   //
4313   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4314   //
4315   // If NegOp1 == Pos then we need:
4316   //
4317   //              EltSize & Mask == NegC & Mask
4318   //
4319   // (because "x & Mask" is a truncation and distributes through subtraction).
4320   APInt Width;
4321   if (Pos == NegOp1)
4322     Width = NegC->getAPIntValue();
4323 
4324   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4325   // Then the condition we want to prove becomes:
4326   //
4327   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4328   //
4329   // which, again because "x & Mask" is a truncation, becomes:
4330   //
4331   //                NegC & Mask == (EltSize - PosC) & Mask
4332   //             EltSize & Mask == (NegC + PosC) & Mask
4333   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4334     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4335       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4336     else
4337       return false;
4338   } else
4339     return false;
4340 
4341   // Now we just need to check that EltSize & Mask == Width & Mask.
4342   if (MaskLoBits)
4343     // EltSize & Mask is 0 since Mask is EltSize - 1.
4344     return Width.getLoBits(MaskLoBits) == 0;
4345   return Width == EltSize;
4346 }
4347 
4348 // A subroutine of MatchRotate used once we have found an OR of two opposite
4349 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4350 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4351 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4352 // Neg with outer conversions stripped away.
4353 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4354                                        SDValue Neg, SDValue InnerPos,
4355                                        SDValue InnerNeg, unsigned PosOpcode,
4356                                        unsigned NegOpcode, const SDLoc &DL) {
4357   // fold (or (shl x, (*ext y)),
4358   //          (srl x, (*ext (sub 32, y)))) ->
4359   //   (rotl x, y) or (rotr x, (sub 32, y))
4360   //
4361   // fold (or (shl x, (*ext (sub 32, y))),
4362   //          (srl x, (*ext y))) ->
4363   //   (rotr x, y) or (rotl x, (sub 32, y))
4364   EVT VT = Shifted.getValueType();
4365   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4366     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4367     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4368                        HasPos ? Pos : Neg).getNode();
4369   }
4370 
4371   return nullptr;
4372 }
4373 
4374 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4375 // idioms for rotate, and if the target supports rotation instructions, generate
4376 // a rot[lr].
4377 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4378   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4379   EVT VT = LHS.getValueType();
4380   if (!TLI.isTypeLegal(VT)) return nullptr;
4381 
4382   // The target must have at least one rotate flavor.
4383   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4384   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4385   if (!HasROTL && !HasROTR) return nullptr;
4386 
4387   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4388   SDValue LHSShift;   // The shift.
4389   SDValue LHSMask;    // AND value if any.
4390   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4391     return nullptr; // Not part of a rotate.
4392 
4393   SDValue RHSShift;   // The shift.
4394   SDValue RHSMask;    // AND value if any.
4395   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4396     return nullptr; // Not part of a rotate.
4397 
4398   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4399     return nullptr;   // Not shifting the same value.
4400 
4401   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4402     return nullptr;   // Shifts must disagree.
4403 
4404   // Canonicalize shl to left side in a shl/srl pair.
4405   if (RHSShift.getOpcode() == ISD::SHL) {
4406     std::swap(LHS, RHS);
4407     std::swap(LHSShift, RHSShift);
4408     std::swap(LHSMask, RHSMask);
4409   }
4410 
4411   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4412   SDValue LHSShiftArg = LHSShift.getOperand(0);
4413   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4414   SDValue RHSShiftArg = RHSShift.getOperand(0);
4415   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4416 
4417   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4418   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4419   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4420     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4421     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4422     if ((LShVal + RShVal) != EltSizeInBits)
4423       return nullptr;
4424 
4425     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4426                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4427 
4428     // If there is an AND of either shifted operand, apply it to the result.
4429     if (LHSMask.getNode() || RHSMask.getNode()) {
4430       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
4431 
4432       if (LHSMask.getNode()) {
4433         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4434         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4435                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4436                                        DAG.getConstant(RHSBits, DL, VT)));
4437       }
4438       if (RHSMask.getNode()) {
4439         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4440         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4441                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4442                                        DAG.getConstant(LHSBits, DL, VT)));
4443       }
4444 
4445       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4446     }
4447 
4448     return Rot.getNode();
4449   }
4450 
4451   // If there is a mask here, and we have a variable shift, we can't be sure
4452   // that we're masking out the right stuff.
4453   if (LHSMask.getNode() || RHSMask.getNode())
4454     return nullptr;
4455 
4456   // If the shift amount is sign/zext/any-extended just peel it off.
4457   SDValue LExtOp0 = LHSShiftAmt;
4458   SDValue RExtOp0 = RHSShiftAmt;
4459   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4460        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4461        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4462        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4463       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4464        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4465        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4466        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4467     LExtOp0 = LHSShiftAmt.getOperand(0);
4468     RExtOp0 = RHSShiftAmt.getOperand(0);
4469   }
4470 
4471   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4472                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4473   if (TryL)
4474     return TryL;
4475 
4476   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4477                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4478   if (TryR)
4479     return TryR;
4480 
4481   return nullptr;
4482 }
4483 
4484 namespace {
4485 /// Helper struct to parse and store a memory address as base + index + offset.
4486 /// We ignore sign extensions when it is safe to do so.
4487 /// The following two expressions are not equivalent. To differentiate we need
4488 /// to store whether there was a sign extension involved in the index
4489 /// computation.
4490 ///  (load (i64 add (i64 copyfromreg %c)
4491 ///                 (i64 signextend (add (i8 load %index)
4492 ///                                      (i8 1))))
4493 /// vs
4494 ///
4495 /// (load (i64 add (i64 copyfromreg %c)
4496 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
4497 ///                                         (i32 1)))))
4498 struct BaseIndexOffset {
4499   SDValue Base;
4500   SDValue Index;
4501   int64_t Offset;
4502   bool IsIndexSignExt;
4503 
4504   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
4505 
4506   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
4507                   bool IsIndexSignExt) :
4508     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
4509 
4510   bool equalBaseIndex(const BaseIndexOffset &Other) {
4511     return Other.Base == Base && Other.Index == Index &&
4512       Other.IsIndexSignExt == IsIndexSignExt;
4513   }
4514 
4515   /// Parses tree in Ptr for base, index, offset addresses.
4516   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
4517                                int64_t PartialOffset = 0) {
4518     bool IsIndexSignExt = false;
4519 
4520     // Split up a folded GlobalAddress+Offset into its component parts.
4521     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
4522       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
4523         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
4524                                                     SDLoc(GA),
4525                                                     GA->getValueType(0),
4526                                                     /*Offset=*/PartialOffset,
4527                                                     /*isTargetGA=*/false,
4528                                                     GA->getTargetFlags()),
4529                                SDValue(),
4530                                GA->getOffset(),
4531                                IsIndexSignExt);
4532       }
4533 
4534     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
4535     // instruction, then it could be just the BASE or everything else we don't
4536     // know how to handle. Just use Ptr as BASE and give up.
4537     if (Ptr->getOpcode() != ISD::ADD)
4538       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4539 
4540     // We know that we have at least an ADD instruction. Try to pattern match
4541     // the simple case of BASE + OFFSET.
4542     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
4543       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
4544       return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
4545     }
4546 
4547     // Inside a loop the current BASE pointer is calculated using an ADD and a
4548     // MUL instruction. In this case Ptr is the actual BASE pointer.
4549     // (i64 add (i64 %array_ptr)
4550     //          (i64 mul (i64 %induction_var)
4551     //                   (i64 %element_size)))
4552     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
4553       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4554 
4555     // Look at Base + Index + Offset cases.
4556     SDValue Base = Ptr->getOperand(0);
4557     SDValue IndexOffset = Ptr->getOperand(1);
4558 
4559     // Skip signextends.
4560     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
4561       IndexOffset = IndexOffset->getOperand(0);
4562       IsIndexSignExt = true;
4563     }
4564 
4565     // Either the case of Base + Index (no offset) or something else.
4566     if (IndexOffset->getOpcode() != ISD::ADD)
4567       return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
4568 
4569     // Now we have the case of Base + Index + offset.
4570     SDValue Index = IndexOffset->getOperand(0);
4571     SDValue Offset = IndexOffset->getOperand(1);
4572 
4573     if (!isa<ConstantSDNode>(Offset))
4574       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4575 
4576     // Ignore signextends.
4577     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
4578       Index = Index->getOperand(0);
4579       IsIndexSignExt = true;
4580     } else IsIndexSignExt = false;
4581 
4582     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
4583     return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
4584   }
4585 };
4586 } // namespace
4587 
4588 namespace {
4589 /// Represents known origin of an individual byte in load combine pattern. The
4590 /// value of the byte is either constant zero or comes from memory.
4591 struct ByteProvider {
4592   // For constant zero providers Load is set to nullptr. For memory providers
4593   // Load represents the node which loads the byte from memory.
4594   // ByteOffset is the offset of the byte in the value produced by the load.
4595   LoadSDNode *Load;
4596   unsigned ByteOffset;
4597 
4598   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4599 
4600   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4601     return ByteProvider(Load, ByteOffset);
4602   }
4603   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4604 
4605   bool isConstantZero() const { return !Load; }
4606   bool isMemory() const { return Load; }
4607 
4608   bool operator==(const ByteProvider &Other) const {
4609     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4610   }
4611 
4612 private:
4613   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4614       : Load(Load), ByteOffset(ByteOffset) {}
4615 };
4616 
4617 /// Recursively traverses the expression calculating the origin of the requested
4618 /// byte of the given value. Returns None if the provider can't be calculated.
4619 ///
4620 /// For all the values except the root of the expression verifies that the value
4621 /// has exactly one use and if it's not true return None. This way if the origin
4622 /// of the byte is returned it's guaranteed that the values which contribute to
4623 /// the byte are not used outside of this expression.
4624 ///
4625 /// Because the parts of the expression are not allowed to have more than one
4626 /// use this function iterates over trees, not DAGs. So it never visits the same
4627 /// node more than once.
4628 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4629                                                    unsigned Depth,
4630                                                    bool Root = false) {
4631   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4632   if (Depth == 10)
4633     return None;
4634 
4635   if (!Root && !Op.hasOneUse())
4636     return None;
4637 
4638   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4639   unsigned BitWidth = Op.getValueSizeInBits();
4640   if (BitWidth % 8 != 0)
4641     return None;
4642   unsigned ByteWidth = BitWidth / 8;
4643   assert(Index < ByteWidth && "invalid index requested");
4644   (void) ByteWidth;
4645 
4646   switch (Op.getOpcode()) {
4647   case ISD::OR: {
4648     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4649     if (!LHS)
4650       return None;
4651     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4652     if (!RHS)
4653       return None;
4654 
4655     if (LHS->isConstantZero())
4656       return RHS;
4657     else if (RHS->isConstantZero())
4658       return LHS;
4659     else
4660       return None;
4661   }
4662   case ISD::SHL: {
4663     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4664     if (!ShiftOp)
4665       return None;
4666 
4667     uint64_t BitShift = ShiftOp->getZExtValue();
4668     if (BitShift % 8 != 0)
4669       return None;
4670     uint64_t ByteShift = BitShift / 8;
4671 
4672     return Index < ByteShift
4673                ? ByteProvider::getConstantZero()
4674                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4675                                        Depth + 1);
4676   }
4677   case ISD::ANY_EXTEND:
4678   case ISD::SIGN_EXTEND:
4679   case ISD::ZERO_EXTEND: {
4680     SDValue NarrowOp = Op->getOperand(0);
4681     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4682     if (NarrowBitWidth % 8 != 0)
4683       return None;
4684     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4685 
4686     if (Index >= NarrowByteWidth)
4687       return Op.getOpcode() == ISD::ZERO_EXTEND
4688                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4689                  : None;
4690     else
4691       return calculateByteProvider(NarrowOp, Index, Depth + 1);
4692   }
4693   case ISD::BSWAP:
4694     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4695                                  Depth + 1);
4696   case ISD::LOAD: {
4697     auto L = cast<LoadSDNode>(Op.getNode());
4698     if (L->isVolatile() || L->isIndexed())
4699       return None;
4700 
4701     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4702     if (NarrowBitWidth % 8 != 0)
4703       return None;
4704     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4705 
4706     if (Index >= NarrowByteWidth)
4707       return L->getExtensionType() == ISD::ZEXTLOAD
4708                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4709                  : None;
4710     else
4711       return ByteProvider::getMemory(L, Index);
4712   }
4713   }
4714 
4715   return None;
4716 }
4717 } // namespace
4718 
4719 /// Match a pattern where a wide type scalar value is loaded by several narrow
4720 /// loads and combined by shifts and ors. Fold it into a single load or a load
4721 /// and a BSWAP if the targets supports it.
4722 ///
4723 /// Assuming little endian target:
4724 ///  i8 *a = ...
4725 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4726 /// =>
4727 ///  i32 val = *((i32)a)
4728 ///
4729 ///  i8 *a = ...
4730 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4731 /// =>
4732 ///  i32 val = BSWAP(*((i32)a))
4733 ///
4734 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4735 /// interact well with the worklist mechanism. When a part of the pattern is
4736 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4737 /// but the root node of the pattern which triggers the load combine is not
4738 /// necessarily a direct user of the changed node. For example, once the address
4739 /// of t28 load is reassociated load combine won't be triggered:
4740 ///             t25: i32 = add t4, Constant:i32<2>
4741 ///           t26: i64 = sign_extend t25
4742 ///        t27: i64 = add t2, t26
4743 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4744 ///     t29: i32 = zero_extend t28
4745 ///   t32: i32 = shl t29, Constant:i8<8>
4746 /// t33: i32 = or t23, t32
4747 /// As a possible fix visitLoad can check if the load can be a part of a load
4748 /// combine pattern and add corresponding OR roots to the worklist.
4749 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4750   assert(N->getOpcode() == ISD::OR &&
4751          "Can only match load combining against OR nodes");
4752 
4753   // Handles simple types only
4754   EVT VT = N->getValueType(0);
4755   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4756     return SDValue();
4757   unsigned ByteWidth = VT.getSizeInBits() / 8;
4758 
4759   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4760   // Before legalize we can introduce too wide illegal loads which will be later
4761   // split into legal sized loads. This enables us to combine i64 load by i8
4762   // patterns to a couple of i32 loads on 32 bit targets.
4763   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4764     return SDValue();
4765 
4766   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4767     unsigned BW, unsigned i) { return i; };
4768   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4769     unsigned BW, unsigned i) { return BW - i - 1; };
4770 
4771   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4772   auto MemoryByteOffset = [&] (ByteProvider P) {
4773     assert(P.isMemory() && "Must be a memory byte provider");
4774     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4775     assert(LoadBitWidth % 8 == 0 &&
4776            "can only analyze providers for individual bytes not bit");
4777     unsigned LoadByteWidth = LoadBitWidth / 8;
4778     return IsBigEndianTarget
4779             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4780             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4781   };
4782 
4783   Optional<BaseIndexOffset> Base;
4784   SDValue Chain;
4785 
4786   SmallSet<LoadSDNode *, 8> Loads;
4787   Optional<ByteProvider> FirstByteProvider;
4788   int64_t FirstOffset = INT64_MAX;
4789 
4790   // Check if all the bytes of the OR we are looking at are loaded from the same
4791   // base address. Collect bytes offsets from Base address in ByteOffsets.
4792   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4793   for (unsigned i = 0; i < ByteWidth; i++) {
4794     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4795     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4796       return SDValue();
4797 
4798     LoadSDNode *L = P->Load;
4799     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4800            "Must be enforced by calculateByteProvider");
4801     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4802 
4803     // All loads must share the same chain
4804     SDValue LChain = L->getChain();
4805     if (!Chain)
4806       Chain = LChain;
4807     else if (Chain != LChain)
4808       return SDValue();
4809 
4810     // Loads must share the same base address
4811     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4812     if (!Base)
4813       Base = Ptr;
4814     else if (!Base->equalBaseIndex(Ptr))
4815       return SDValue();
4816 
4817     // Calculate the offset of the current byte from the base address
4818     int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
4819     ByteOffsets[i] = ByteOffsetFromBase;
4820 
4821     // Remember the first byte load
4822     if (ByteOffsetFromBase < FirstOffset) {
4823       FirstByteProvider = P;
4824       FirstOffset = ByteOffsetFromBase;
4825     }
4826 
4827     Loads.insert(L);
4828   }
4829   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
4830          "memory, so there must be at least one load which produces the value");
4831   assert(Base && "Base address of the accessed memory location must be set");
4832   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
4833 
4834   // Check if the bytes of the OR we are looking at match with either big or
4835   // little endian value load
4836   bool BigEndian = true, LittleEndian = true;
4837   for (unsigned i = 0; i < ByteWidth; i++) {
4838     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
4839     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
4840     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
4841     if (!BigEndian && !LittleEndian)
4842       return SDValue();
4843   }
4844   assert((BigEndian != LittleEndian) && "should be either or");
4845   assert(FirstByteProvider && "must be set");
4846 
4847   // Ensure that the first byte is loaded from zero offset of the first load.
4848   // So the combined value can be loaded from the first load address.
4849   if (MemoryByteOffset(*FirstByteProvider) != 0)
4850     return SDValue();
4851   LoadSDNode *FirstLoad = FirstByteProvider->Load;
4852 
4853   // The node we are looking at matches with the pattern, check if we can
4854   // replace it with a single load and bswap if needed.
4855 
4856   // If the load needs byte swap check if the target supports it
4857   bool NeedsBswap = IsBigEndianTarget != BigEndian;
4858 
4859   // Before legalize we can introduce illegal bswaps which will be later
4860   // converted to an explicit bswap sequence. This way we end up with a single
4861   // load and byte shuffling instead of several loads and byte shuffling.
4862   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
4863     return SDValue();
4864 
4865   // Check that a load of the wide type is both allowed and fast on the target
4866   bool Fast = false;
4867   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
4868                                         VT, FirstLoad->getAddressSpace(),
4869                                         FirstLoad->getAlignment(), &Fast);
4870   if (!Allowed || !Fast)
4871     return SDValue();
4872 
4873   SDValue NewLoad =
4874       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
4875                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
4876 
4877   // Transfer chain users from old loads to the new load.
4878   for (LoadSDNode *L : Loads)
4879     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
4880 
4881   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
4882 }
4883 
4884 SDValue DAGCombiner::visitXOR(SDNode *N) {
4885   SDValue N0 = N->getOperand(0);
4886   SDValue N1 = N->getOperand(1);
4887   EVT VT = N0.getValueType();
4888 
4889   // fold vector ops
4890   if (VT.isVector()) {
4891     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4892       return FoldedVOp;
4893 
4894     // fold (xor x, 0) -> x, vector edition
4895     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4896       return N1;
4897     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4898       return N0;
4899   }
4900 
4901   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4902   if (N0.isUndef() && N1.isUndef())
4903     return DAG.getConstant(0, SDLoc(N), VT);
4904   // fold (xor x, undef) -> undef
4905   if (N0.isUndef())
4906     return N0;
4907   if (N1.isUndef())
4908     return N1;
4909   // fold (xor c1, c2) -> c1^c2
4910   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4911   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4912   if (N0C && N1C)
4913     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4914   // canonicalize constant to RHS
4915   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4916      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4917     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4918   // fold (xor x, 0) -> x
4919   if (isNullConstant(N1))
4920     return N0;
4921 
4922   if (SDValue NewSel = foldBinOpIntoSelect(N))
4923     return NewSel;
4924 
4925   // reassociate xor
4926   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4927     return RXOR;
4928 
4929   // fold !(x cc y) -> (x !cc y)
4930   SDValue LHS, RHS, CC;
4931   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4932     bool isInt = LHS.getValueType().isInteger();
4933     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4934                                                isInt);
4935 
4936     if (!LegalOperations ||
4937         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4938       switch (N0.getOpcode()) {
4939       default:
4940         llvm_unreachable("Unhandled SetCC Equivalent!");
4941       case ISD::SETCC:
4942         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
4943       case ISD::SELECT_CC:
4944         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
4945                                N0.getOperand(3), NotCC);
4946       }
4947     }
4948   }
4949 
4950   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4951   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4952       N0.getNode()->hasOneUse() &&
4953       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4954     SDValue V = N0.getOperand(0);
4955     SDLoc DL(N0);
4956     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4957                     DAG.getConstant(1, DL, V.getValueType()));
4958     AddToWorklist(V.getNode());
4959     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4960   }
4961 
4962   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4963   if (isOneConstant(N1) && VT == MVT::i1 &&
4964       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4965     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4966     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4967       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4968       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4969       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4970       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4971       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4972     }
4973   }
4974   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4975   if (isAllOnesConstant(N1) &&
4976       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4977     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4978     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4979       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4980       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4981       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4982       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4983       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4984     }
4985   }
4986   // fold (xor (and x, y), y) -> (and (not x), y)
4987   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4988       N0->getOperand(1) == N1) {
4989     SDValue X = N0->getOperand(0);
4990     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4991     AddToWorklist(NotX.getNode());
4992     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4993   }
4994   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4995   if (N1C && N0.getOpcode() == ISD::XOR) {
4996     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4997       SDLoc DL(N);
4998       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4999                          DAG.getConstant(N1C->getAPIntValue() ^
5000                                          N00C->getAPIntValue(), DL, VT));
5001     }
5002     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
5003       SDLoc DL(N);
5004       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
5005                          DAG.getConstant(N1C->getAPIntValue() ^
5006                                          N01C->getAPIntValue(), DL, VT));
5007     }
5008   }
5009 
5010   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5011   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5012   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5013       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5014       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5015     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5016       if (C->getAPIntValue() == (OpSizeInBits - 1))
5017         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5018   }
5019 
5020   // fold (xor x, x) -> 0
5021   if (N0 == N1)
5022     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5023 
5024   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5025   // Here is a concrete example of this equivalence:
5026   // i16   x ==  14
5027   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5028   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5029   //
5030   // =>
5031   //
5032   // i16     ~1      == 0b1111111111111110
5033   // i16 rol(~1, 14) == 0b1011111111111111
5034   //
5035   // Some additional tips to help conceptualize this transform:
5036   // - Try to see the operation as placing a single zero in a value of all ones.
5037   // - There exists no value for x which would allow the result to contain zero.
5038   // - Values of x larger than the bitwidth are undefined and do not require a
5039   //   consistent result.
5040   // - Pushing the zero left requires shifting one bits in from the right.
5041   // A rotate left of ~1 is a nice way of achieving the desired result.
5042   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5043       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5044     SDLoc DL(N);
5045     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5046                        N0.getOperand(1));
5047   }
5048 
5049   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5050   if (N0.getOpcode() == N1.getOpcode())
5051     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5052       return Tmp;
5053 
5054   // Simplify the expression using non-local knowledge.
5055   if (!VT.isVector() &&
5056       SimplifyDemandedBits(SDValue(N, 0)))
5057     return SDValue(N, 0);
5058 
5059   return SDValue();
5060 }
5061 
5062 /// Handle transforms common to the three shifts, when the shift amount is a
5063 /// constant.
5064 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5065   SDNode *LHS = N->getOperand(0).getNode();
5066   if (!LHS->hasOneUse()) return SDValue();
5067 
5068   // We want to pull some binops through shifts, so that we have (and (shift))
5069   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5070   // thing happens with address calculations, so it's important to canonicalize
5071   // it.
5072   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5073 
5074   switch (LHS->getOpcode()) {
5075   default: return SDValue();
5076   case ISD::OR:
5077   case ISD::XOR:
5078     HighBitSet = false; // We can only transform sra if the high bit is clear.
5079     break;
5080   case ISD::AND:
5081     HighBitSet = true;  // We can only transform sra if the high bit is set.
5082     break;
5083   case ISD::ADD:
5084     if (N->getOpcode() != ISD::SHL)
5085       return SDValue(); // only shl(add) not sr[al](add).
5086     HighBitSet = false; // We can only transform sra if the high bit is clear.
5087     break;
5088   }
5089 
5090   // We require the RHS of the binop to be a constant and not opaque as well.
5091   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5092   if (!BinOpCst) return SDValue();
5093 
5094   // FIXME: disable this unless the input to the binop is a shift by a constant
5095   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5096   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5097   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5098                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5099                  BinOpLHSVal->getOpcode() == ISD::SRL;
5100   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5101                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5102 
5103   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5104       !isCopyOrSelect)
5105     return SDValue();
5106 
5107   if (isCopyOrSelect && N->hasOneUse())
5108     return SDValue();
5109 
5110   EVT VT = N->getValueType(0);
5111 
5112   // If this is a signed shift right, and the high bit is modified by the
5113   // logical operation, do not perform the transformation. The highBitSet
5114   // boolean indicates the value of the high bit of the constant which would
5115   // cause it to be modified for this operation.
5116   if (N->getOpcode() == ISD::SRA) {
5117     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5118     if (BinOpRHSSignSet != HighBitSet)
5119       return SDValue();
5120   }
5121 
5122   if (!TLI.isDesirableToCommuteWithShift(LHS))
5123     return SDValue();
5124 
5125   // Fold the constants, shifting the binop RHS by the shift amount.
5126   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5127                                N->getValueType(0),
5128                                LHS->getOperand(1), N->getOperand(1));
5129   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5130 
5131   // Create the new shift.
5132   SDValue NewShift = DAG.getNode(N->getOpcode(),
5133                                  SDLoc(LHS->getOperand(0)),
5134                                  VT, LHS->getOperand(0), N->getOperand(1));
5135 
5136   // Create the new binop.
5137   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5138 }
5139 
5140 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5141   assert(N->getOpcode() == ISD::TRUNCATE);
5142   assert(N->getOperand(0).getOpcode() == ISD::AND);
5143 
5144   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5145   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5146     SDValue N01 = N->getOperand(0).getOperand(1);
5147     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5148       SDLoc DL(N);
5149       EVT TruncVT = N->getValueType(0);
5150       SDValue N00 = N->getOperand(0).getOperand(0);
5151       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5152       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5153       AddToWorklist(Trunc00.getNode());
5154       AddToWorklist(Trunc01.getNode());
5155       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5156     }
5157   }
5158 
5159   return SDValue();
5160 }
5161 
5162 SDValue DAGCombiner::visitRotate(SDNode *N) {
5163   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5164   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
5165       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
5166     if (SDValue NewOp1 =
5167             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
5168       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
5169                          N->getOperand(0), NewOp1);
5170   }
5171   return SDValue();
5172 }
5173 
5174 SDValue DAGCombiner::visitSHL(SDNode *N) {
5175   SDValue N0 = N->getOperand(0);
5176   SDValue N1 = N->getOperand(1);
5177   EVT VT = N0.getValueType();
5178   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5179 
5180   // fold vector ops
5181   if (VT.isVector()) {
5182     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5183       return FoldedVOp;
5184 
5185     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5186     // If setcc produces all-one true value then:
5187     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5188     if (N1CV && N1CV->isConstant()) {
5189       if (N0.getOpcode() == ISD::AND) {
5190         SDValue N00 = N0->getOperand(0);
5191         SDValue N01 = N0->getOperand(1);
5192         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5193 
5194         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5195             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5196                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5197           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5198                                                      N01CV, N1CV))
5199             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5200         }
5201       }
5202     }
5203   }
5204 
5205   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5206 
5207   // fold (shl c1, c2) -> c1<<c2
5208   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5209   if (N0C && N1C && !N1C->isOpaque())
5210     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5211   // fold (shl 0, x) -> 0
5212   if (isNullConstant(N0))
5213     return N0;
5214   // fold (shl x, c >= size(x)) -> undef
5215   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5216     return DAG.getUNDEF(VT);
5217   // fold (shl x, 0) -> x
5218   if (N1C && N1C->isNullValue())
5219     return N0;
5220   // fold (shl undef, x) -> 0
5221   if (N0.isUndef())
5222     return DAG.getConstant(0, SDLoc(N), VT);
5223 
5224   if (SDValue NewSel = foldBinOpIntoSelect(N))
5225     return NewSel;
5226 
5227   // if (shl x, c) is known to be zero, return 0
5228   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5229                             APInt::getAllOnesValue(OpSizeInBits)))
5230     return DAG.getConstant(0, SDLoc(N), VT);
5231   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5232   if (N1.getOpcode() == ISD::TRUNCATE &&
5233       N1.getOperand(0).getOpcode() == ISD::AND) {
5234     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5235       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5236   }
5237 
5238   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5239     return SDValue(N, 0);
5240 
5241   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5242   if (N1C && N0.getOpcode() == ISD::SHL) {
5243     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5244       SDLoc DL(N);
5245       APInt c1 = N0C1->getAPIntValue();
5246       APInt c2 = N1C->getAPIntValue();
5247       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5248 
5249       APInt Sum = c1 + c2;
5250       if (Sum.uge(OpSizeInBits))
5251         return DAG.getConstant(0, DL, VT);
5252 
5253       return DAG.getNode(
5254           ISD::SHL, DL, VT, N0.getOperand(0),
5255           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5256     }
5257   }
5258 
5259   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5260   // For this to be valid, the second form must not preserve any of the bits
5261   // that are shifted out by the inner shift in the first form.  This means
5262   // the outer shift size must be >= the number of bits added by the ext.
5263   // As a corollary, we don't care what kind of ext it is.
5264   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5265               N0.getOpcode() == ISD::ANY_EXTEND ||
5266               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5267       N0.getOperand(0).getOpcode() == ISD::SHL) {
5268     SDValue N0Op0 = N0.getOperand(0);
5269     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5270       APInt c1 = N0Op0C1->getAPIntValue();
5271       APInt c2 = N1C->getAPIntValue();
5272       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5273 
5274       EVT InnerShiftVT = N0Op0.getValueType();
5275       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5276       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5277         SDLoc DL(N0);
5278         APInt Sum = c1 + c2;
5279         if (Sum.uge(OpSizeInBits))
5280           return DAG.getConstant(0, DL, VT);
5281 
5282         return DAG.getNode(
5283             ISD::SHL, DL, VT,
5284             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5285             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5286       }
5287     }
5288   }
5289 
5290   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5291   // Only fold this if the inner zext has no other uses to avoid increasing
5292   // the total number of instructions.
5293   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5294       N0.getOperand(0).getOpcode() == ISD::SRL) {
5295     SDValue N0Op0 = N0.getOperand(0);
5296     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5297       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5298         uint64_t c1 = N0Op0C1->getZExtValue();
5299         uint64_t c2 = N1C->getZExtValue();
5300         if (c1 == c2) {
5301           SDValue NewOp0 = N0.getOperand(0);
5302           EVT CountVT = NewOp0.getOperand(1).getValueType();
5303           SDLoc DL(N);
5304           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5305                                        NewOp0,
5306                                        DAG.getConstant(c2, DL, CountVT));
5307           AddToWorklist(NewSHL.getNode());
5308           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5309         }
5310       }
5311     }
5312   }
5313 
5314   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5315   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5316   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5317       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
5318     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5319       uint64_t C1 = N0C1->getZExtValue();
5320       uint64_t C2 = N1C->getZExtValue();
5321       SDLoc DL(N);
5322       if (C1 <= C2)
5323         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5324                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5325       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5326                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5327     }
5328   }
5329 
5330   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5331   //                               (and (srl x, (sub c1, c2), MASK)
5332   // Only fold this if the inner shift has no other uses -- if it does, folding
5333   // this will increase the total number of instructions.
5334   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5335     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5336       uint64_t c1 = N0C1->getZExtValue();
5337       if (c1 < OpSizeInBits) {
5338         uint64_t c2 = N1C->getZExtValue();
5339         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5340         SDValue Shift;
5341         if (c2 > c1) {
5342           Mask = Mask.shl(c2 - c1);
5343           SDLoc DL(N);
5344           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5345                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5346         } else {
5347           Mask = Mask.lshr(c1 - c2);
5348           SDLoc DL(N);
5349           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5350                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5351         }
5352         SDLoc DL(N0);
5353         return DAG.getNode(ISD::AND, DL, VT, Shift,
5354                            DAG.getConstant(Mask, DL, VT));
5355       }
5356     }
5357   }
5358 
5359   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5360   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5361       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5362     SDLoc DL(N);
5363     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5364     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5365     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5366   }
5367 
5368   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5369   // Variant of version done on multiply, except mul by a power of 2 is turned
5370   // into a shift.
5371   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5372       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5373       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5374     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5375     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5376     AddToWorklist(Shl0.getNode());
5377     AddToWorklist(Shl1.getNode());
5378     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5379   }
5380 
5381   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5382   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5383       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5384       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5385     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5386     if (isConstantOrConstantVector(Shl))
5387       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5388   }
5389 
5390   if (N1C && !N1C->isOpaque())
5391     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5392       return NewSHL;
5393 
5394   return SDValue();
5395 }
5396 
5397 SDValue DAGCombiner::visitSRA(SDNode *N) {
5398   SDValue N0 = N->getOperand(0);
5399   SDValue N1 = N->getOperand(1);
5400   EVT VT = N0.getValueType();
5401   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5402 
5403   // Arithmetic shifting an all-sign-bit value is a no-op.
5404   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5405     return N0;
5406 
5407   // fold vector ops
5408   if (VT.isVector())
5409     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5410       return FoldedVOp;
5411 
5412   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5413 
5414   // fold (sra c1, c2) -> (sra c1, c2)
5415   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5416   if (N0C && N1C && !N1C->isOpaque())
5417     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5418   // fold (sra 0, x) -> 0
5419   if (isNullConstant(N0))
5420     return N0;
5421   // fold (sra -1, x) -> -1
5422   if (isAllOnesConstant(N0))
5423     return N0;
5424   // fold (sra x, c >= size(x)) -> undef
5425   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5426     return DAG.getUNDEF(VT);
5427   // fold (sra x, 0) -> x
5428   if (N1C && N1C->isNullValue())
5429     return N0;
5430 
5431   if (SDValue NewSel = foldBinOpIntoSelect(N))
5432     return NewSel;
5433 
5434   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5435   // sext_inreg.
5436   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5437     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5438     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5439     if (VT.isVector())
5440       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5441                                ExtVT, VT.getVectorNumElements());
5442     if ((!LegalOperations ||
5443          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5444       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5445                          N0.getOperand(0), DAG.getValueType(ExtVT));
5446   }
5447 
5448   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5449   if (N1C && N0.getOpcode() == ISD::SRA) {
5450     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5451       SDLoc DL(N);
5452       APInt c1 = N0C1->getAPIntValue();
5453       APInt c2 = N1C->getAPIntValue();
5454       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5455 
5456       APInt Sum = c1 + c2;
5457       if (Sum.uge(OpSizeInBits))
5458         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
5459 
5460       return DAG.getNode(
5461           ISD::SRA, DL, VT, N0.getOperand(0),
5462           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5463     }
5464   }
5465 
5466   // fold (sra (shl X, m), (sub result_size, n))
5467   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5468   // result_size - n != m.
5469   // If truncate is free for the target sext(shl) is likely to result in better
5470   // code.
5471   if (N0.getOpcode() == ISD::SHL && N1C) {
5472     // Get the two constanst of the shifts, CN0 = m, CN = n.
5473     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5474     if (N01C) {
5475       LLVMContext &Ctx = *DAG.getContext();
5476       // Determine what the truncate's result bitsize and type would be.
5477       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5478 
5479       if (VT.isVector())
5480         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5481 
5482       // Determine the residual right-shift amount.
5483       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5484 
5485       // If the shift is not a no-op (in which case this should be just a sign
5486       // extend already), the truncated to type is legal, sign_extend is legal
5487       // on that type, and the truncate to that type is both legal and free,
5488       // perform the transform.
5489       if ((ShiftAmt > 0) &&
5490           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5491           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5492           TLI.isTruncateFree(VT, TruncVT)) {
5493 
5494         SDLoc DL(N);
5495         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5496             getShiftAmountTy(N0.getOperand(0).getValueType()));
5497         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5498                                     N0.getOperand(0), Amt);
5499         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5500                                     Shift);
5501         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5502                            N->getValueType(0), Trunc);
5503       }
5504     }
5505   }
5506 
5507   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5508   if (N1.getOpcode() == ISD::TRUNCATE &&
5509       N1.getOperand(0).getOpcode() == ISD::AND) {
5510     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5511       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5512   }
5513 
5514   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5515   //      if c1 is equal to the number of bits the trunc removes
5516   if (N0.getOpcode() == ISD::TRUNCATE &&
5517       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5518        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5519       N0.getOperand(0).hasOneUse() &&
5520       N0.getOperand(0).getOperand(1).hasOneUse() &&
5521       N1C) {
5522     SDValue N0Op0 = N0.getOperand(0);
5523     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5524       unsigned LargeShiftVal = LargeShift->getZExtValue();
5525       EVT LargeVT = N0Op0.getValueType();
5526 
5527       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5528         SDLoc DL(N);
5529         SDValue Amt =
5530           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5531                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5532         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5533                                   N0Op0.getOperand(0), Amt);
5534         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5535       }
5536     }
5537   }
5538 
5539   // Simplify, based on bits shifted out of the LHS.
5540   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5541     return SDValue(N, 0);
5542 
5543 
5544   // If the sign bit is known to be zero, switch this to a SRL.
5545   if (DAG.SignBitIsZero(N0))
5546     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5547 
5548   if (N1C && !N1C->isOpaque())
5549     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5550       return NewSRA;
5551 
5552   return SDValue();
5553 }
5554 
5555 SDValue DAGCombiner::visitSRL(SDNode *N) {
5556   SDValue N0 = N->getOperand(0);
5557   SDValue N1 = N->getOperand(1);
5558   EVT VT = N0.getValueType();
5559   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5560 
5561   // fold vector ops
5562   if (VT.isVector())
5563     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5564       return FoldedVOp;
5565 
5566   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5567 
5568   // fold (srl c1, c2) -> c1 >>u c2
5569   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5570   if (N0C && N1C && !N1C->isOpaque())
5571     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5572   // fold (srl 0, x) -> 0
5573   if (isNullConstant(N0))
5574     return N0;
5575   // fold (srl x, c >= size(x)) -> undef
5576   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5577     return DAG.getUNDEF(VT);
5578   // fold (srl x, 0) -> x
5579   if (N1C && N1C->isNullValue())
5580     return N0;
5581 
5582   if (SDValue NewSel = foldBinOpIntoSelect(N))
5583     return NewSel;
5584 
5585   // if (srl x, c) is known to be zero, return 0
5586   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5587                                    APInt::getAllOnesValue(OpSizeInBits)))
5588     return DAG.getConstant(0, SDLoc(N), VT);
5589 
5590   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5591   if (N1C && N0.getOpcode() == ISD::SRL) {
5592     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5593       SDLoc DL(N);
5594       APInt c1 = N0C1->getAPIntValue();
5595       APInt c2 = N1C->getAPIntValue();
5596       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5597 
5598       APInt Sum = c1 + c2;
5599       if (Sum.uge(OpSizeInBits))
5600         return DAG.getConstant(0, DL, VT);
5601 
5602       return DAG.getNode(
5603           ISD::SRL, DL, VT, N0.getOperand(0),
5604           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5605     }
5606   }
5607 
5608   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5609   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5610       N0.getOperand(0).getOpcode() == ISD::SRL &&
5611       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
5612     uint64_t c1 =
5613       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
5614     uint64_t c2 = N1C->getZExtValue();
5615     EVT InnerShiftVT = N0.getOperand(0).getValueType();
5616     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
5617     uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5618     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5619     if (c1 + OpSizeInBits == InnerShiftSize) {
5620       SDLoc DL(N0);
5621       if (c1 + c2 >= InnerShiftSize)
5622         return DAG.getConstant(0, DL, VT);
5623       return DAG.getNode(ISD::TRUNCATE, DL, VT,
5624                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5625                                      N0.getOperand(0)->getOperand(0),
5626                                      DAG.getConstant(c1 + c2, DL,
5627                                                      ShiftCountVT)));
5628     }
5629   }
5630 
5631   // fold (srl (shl x, c), c) -> (and x, cst2)
5632   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5633       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5634     SDLoc DL(N);
5635     SDValue Mask =
5636         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5637     AddToWorklist(Mask.getNode());
5638     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5639   }
5640 
5641   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5642   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5643     // Shifting in all undef bits?
5644     EVT SmallVT = N0.getOperand(0).getValueType();
5645     unsigned BitSize = SmallVT.getScalarSizeInBits();
5646     if (N1C->getZExtValue() >= BitSize)
5647       return DAG.getUNDEF(VT);
5648 
5649     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5650       uint64_t ShiftAmt = N1C->getZExtValue();
5651       SDLoc DL0(N0);
5652       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5653                                        N0.getOperand(0),
5654                           DAG.getConstant(ShiftAmt, DL0,
5655                                           getShiftAmountTy(SmallVT)));
5656       AddToWorklist(SmallShift.getNode());
5657       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
5658       SDLoc DL(N);
5659       return DAG.getNode(ISD::AND, DL, VT,
5660                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5661                          DAG.getConstant(Mask, DL, VT));
5662     }
5663   }
5664 
5665   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5666   // bit, which is unmodified by sra.
5667   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5668     if (N0.getOpcode() == ISD::SRA)
5669       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5670   }
5671 
5672   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5673   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5674       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5675     APInt KnownZero, KnownOne;
5676     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
5677 
5678     // If any of the input bits are KnownOne, then the input couldn't be all
5679     // zeros, thus the result of the srl will always be zero.
5680     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5681 
5682     // If all of the bits input the to ctlz node are known to be zero, then
5683     // the result of the ctlz is "32" and the result of the shift is one.
5684     APInt UnknownBits = ~KnownZero;
5685     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5686 
5687     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5688     if ((UnknownBits & (UnknownBits - 1)) == 0) {
5689       // Okay, we know that only that the single bit specified by UnknownBits
5690       // could be set on input to the CTLZ node. If this bit is set, the SRL
5691       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5692       // to an SRL/XOR pair, which is likely to simplify more.
5693       unsigned ShAmt = UnknownBits.countTrailingZeros();
5694       SDValue Op = N0.getOperand(0);
5695 
5696       if (ShAmt) {
5697         SDLoc DL(N0);
5698         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5699                   DAG.getConstant(ShAmt, DL,
5700                                   getShiftAmountTy(Op.getValueType())));
5701         AddToWorklist(Op.getNode());
5702       }
5703 
5704       SDLoc DL(N);
5705       return DAG.getNode(ISD::XOR, DL, VT,
5706                          Op, DAG.getConstant(1, DL, VT));
5707     }
5708   }
5709 
5710   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5711   if (N1.getOpcode() == ISD::TRUNCATE &&
5712       N1.getOperand(0).getOpcode() == ISD::AND) {
5713     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5714       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5715   }
5716 
5717   // fold operands of srl based on knowledge that the low bits are not
5718   // demanded.
5719   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5720     return SDValue(N, 0);
5721 
5722   if (N1C && !N1C->isOpaque())
5723     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5724       return NewSRL;
5725 
5726   // Attempt to convert a srl of a load into a narrower zero-extending load.
5727   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5728     return NarrowLoad;
5729 
5730   // Here is a common situation. We want to optimize:
5731   //
5732   //   %a = ...
5733   //   %b = and i32 %a, 2
5734   //   %c = srl i32 %b, 1
5735   //   brcond i32 %c ...
5736   //
5737   // into
5738   //
5739   //   %a = ...
5740   //   %b = and %a, 2
5741   //   %c = setcc eq %b, 0
5742   //   brcond %c ...
5743   //
5744   // However when after the source operand of SRL is optimized into AND, the SRL
5745   // itself may not be optimized further. Look for it and add the BRCOND into
5746   // the worklist.
5747   if (N->hasOneUse()) {
5748     SDNode *Use = *N->use_begin();
5749     if (Use->getOpcode() == ISD::BRCOND)
5750       AddToWorklist(Use);
5751     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5752       // Also look pass the truncate.
5753       Use = *Use->use_begin();
5754       if (Use->getOpcode() == ISD::BRCOND)
5755         AddToWorklist(Use);
5756     }
5757   }
5758 
5759   return SDValue();
5760 }
5761 
5762 SDValue DAGCombiner::visitABS(SDNode *N) {
5763   SDValue N0 = N->getOperand(0);
5764   EVT VT = N->getValueType(0);
5765 
5766   // fold (abs c1) -> c2
5767   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5768     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5769   // fold (abs (abs x)) -> (abs x)
5770   if (N0.getOpcode() == ISD::ABS)
5771     return N0;
5772   // fold (abs x) -> x iff not-negative
5773   if (DAG.SignBitIsZero(N0))
5774     return N0;
5775   return SDValue();
5776 }
5777 
5778 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5779   SDValue N0 = N->getOperand(0);
5780   EVT VT = N->getValueType(0);
5781 
5782   // fold (bswap c1) -> c2
5783   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5784     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5785   // fold (bswap (bswap x)) -> x
5786   if (N0.getOpcode() == ISD::BSWAP)
5787     return N0->getOperand(0);
5788   return SDValue();
5789 }
5790 
5791 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5792   SDValue N0 = N->getOperand(0);
5793   EVT VT = N->getValueType(0);
5794 
5795   // fold (bitreverse c1) -> c2
5796   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5797     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
5798   // fold (bitreverse (bitreverse x)) -> x
5799   if (N0.getOpcode() == ISD::BITREVERSE)
5800     return N0.getOperand(0);
5801   return SDValue();
5802 }
5803 
5804 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5805   SDValue N0 = N->getOperand(0);
5806   EVT VT = N->getValueType(0);
5807 
5808   // fold (ctlz c1) -> c2
5809   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5810     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5811   return SDValue();
5812 }
5813 
5814 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5815   SDValue N0 = N->getOperand(0);
5816   EVT VT = N->getValueType(0);
5817 
5818   // fold (ctlz_zero_undef c1) -> c2
5819   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5820     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5821   return SDValue();
5822 }
5823 
5824 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5825   SDValue N0 = N->getOperand(0);
5826   EVT VT = N->getValueType(0);
5827 
5828   // fold (cttz c1) -> c2
5829   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5830     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5831   return SDValue();
5832 }
5833 
5834 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5835   SDValue N0 = N->getOperand(0);
5836   EVT VT = N->getValueType(0);
5837 
5838   // fold (cttz_zero_undef c1) -> c2
5839   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5840     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5841   return SDValue();
5842 }
5843 
5844 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5845   SDValue N0 = N->getOperand(0);
5846   EVT VT = N->getValueType(0);
5847 
5848   // fold (ctpop c1) -> c2
5849   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5850     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5851   return SDValue();
5852 }
5853 
5854 
5855 /// \brief Generate Min/Max node
5856 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5857                                    SDValue RHS, SDValue True, SDValue False,
5858                                    ISD::CondCode CC, const TargetLowering &TLI,
5859                                    SelectionDAG &DAG) {
5860   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5861     return SDValue();
5862 
5863   switch (CC) {
5864   case ISD::SETOLT:
5865   case ISD::SETOLE:
5866   case ISD::SETLT:
5867   case ISD::SETLE:
5868   case ISD::SETULT:
5869   case ISD::SETULE: {
5870     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5871     if (TLI.isOperationLegal(Opcode, VT))
5872       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5873     return SDValue();
5874   }
5875   case ISD::SETOGT:
5876   case ISD::SETOGE:
5877   case ISD::SETGT:
5878   case ISD::SETGE:
5879   case ISD::SETUGT:
5880   case ISD::SETUGE: {
5881     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5882     if (TLI.isOperationLegal(Opcode, VT))
5883       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5884     return SDValue();
5885   }
5886   default:
5887     return SDValue();
5888   }
5889 }
5890 
5891 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5892   SDValue Cond = N->getOperand(0);
5893   SDValue N1 = N->getOperand(1);
5894   SDValue N2 = N->getOperand(2);
5895   EVT VT = N->getValueType(0);
5896   EVT CondVT = Cond.getValueType();
5897   SDLoc DL(N);
5898 
5899   if (!VT.isInteger())
5900     return SDValue();
5901 
5902   auto *C1 = dyn_cast<ConstantSDNode>(N1);
5903   auto *C2 = dyn_cast<ConstantSDNode>(N2);
5904   if (!C1 || !C2)
5905     return SDValue();
5906 
5907   // Only do this before legalization to avoid conflicting with target-specific
5908   // transforms in the other direction (create a select from a zext/sext). There
5909   // is also a target-independent combine here in DAGCombiner in the other
5910   // direction for (select Cond, -1, 0) when the condition is not i1.
5911   if (CondVT == MVT::i1 && !LegalOperations) {
5912     if (C1->isNullValue() && C2->isOne()) {
5913       // select Cond, 0, 1 --> zext (!Cond)
5914       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
5915       if (VT != MVT::i1)
5916         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
5917       return NotCond;
5918     }
5919     if (C1->isNullValue() && C2->isAllOnesValue()) {
5920       // select Cond, 0, -1 --> sext (!Cond)
5921       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
5922       if (VT != MVT::i1)
5923         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
5924       return NotCond;
5925     }
5926     if (C1->isOne() && C2->isNullValue()) {
5927       // select Cond, 1, 0 --> zext (Cond)
5928       if (VT != MVT::i1)
5929         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
5930       return Cond;
5931     }
5932     if (C1->isAllOnesValue() && C2->isNullValue()) {
5933       // select Cond, -1, 0 --> sext (Cond)
5934       if (VT != MVT::i1)
5935         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
5936       return Cond;
5937     }
5938 
5939     // For any constants that differ by 1, we can transform the select into an
5940     // extend and add. Use a target hook because some targets may prefer to
5941     // transform in the other direction.
5942     if (TLI.convertSelectOfConstantsToMath()) {
5943       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
5944         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
5945         if (VT != MVT::i1)
5946           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
5947         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
5948       }
5949       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
5950         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
5951         if (VT != MVT::i1)
5952           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
5953         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
5954       }
5955     }
5956 
5957     return SDValue();
5958   }
5959 
5960   // fold (select Cond, 0, 1) -> (xor Cond, 1)
5961   // We can't do this reliably if integer based booleans have different contents
5962   // to floating point based booleans. This is because we can't tell whether we
5963   // have an integer-based boolean or a floating-point-based boolean unless we
5964   // can find the SETCC that produced it and inspect its operands. This is
5965   // fairly easy if C is the SETCC node, but it can potentially be
5966   // undiscoverable (or not reasonably discoverable). For example, it could be
5967   // in another basic block or it could require searching a complicated
5968   // expression.
5969   if (CondVT.isInteger() &&
5970       TLI.getBooleanContents(false, true) ==
5971           TargetLowering::ZeroOrOneBooleanContent &&
5972       TLI.getBooleanContents(false, false) ==
5973           TargetLowering::ZeroOrOneBooleanContent &&
5974       C1->isNullValue() && C2->isOne()) {
5975     SDValue NotCond =
5976         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
5977     if (VT.bitsEq(CondVT))
5978       return NotCond;
5979     return DAG.getZExtOrTrunc(NotCond, DL, VT);
5980   }
5981 
5982   return SDValue();
5983 }
5984 
5985 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5986   SDValue N0 = N->getOperand(0);
5987   SDValue N1 = N->getOperand(1);
5988   SDValue N2 = N->getOperand(2);
5989   EVT VT = N->getValueType(0);
5990   EVT VT0 = N0.getValueType();
5991 
5992   // fold (select C, X, X) -> X
5993   if (N1 == N2)
5994     return N1;
5995   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5996     // fold (select true, X, Y) -> X
5997     // fold (select false, X, Y) -> Y
5998     return !N0C->isNullValue() ? N1 : N2;
5999   }
6000   // fold (select X, X, Y) -> (or X, Y)
6001   // fold (select X, 1, Y) -> (or C, Y)
6002   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6003     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
6004 
6005   if (SDValue V = foldSelectOfConstants(N))
6006     return V;
6007 
6008   // fold (select C, 0, X) -> (and (not C), X)
6009   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6010     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6011     AddToWorklist(NOTNode.getNode());
6012     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
6013   }
6014   // fold (select C, X, 1) -> (or (not C), X)
6015   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6016     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6017     AddToWorklist(NOTNode.getNode());
6018     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
6019   }
6020   // fold (select X, Y, X) -> (and X, Y)
6021   // fold (select X, Y, 0) -> (and X, Y)
6022   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6023     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
6024 
6025   // If we can fold this based on the true/false value, do so.
6026   if (SimplifySelectOps(N, N1, N2))
6027     return SDValue(N, 0);  // Don't revisit N.
6028 
6029   if (VT0 == MVT::i1) {
6030     // The code in this block deals with the following 2 equivalences:
6031     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6032     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6033     // The target can specify its preferred form with the
6034     // shouldNormalizeToSelectSequence() callback. However we always transform
6035     // to the right anyway if we find the inner select exists in the DAG anyway
6036     // and we always transform to the left side if we know that we can further
6037     // optimize the combination of the conditions.
6038     bool normalizeToSequence
6039       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6040     // select (and Cond0, Cond1), X, Y
6041     //   -> select Cond0, (select Cond1, X, Y), Y
6042     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6043       SDValue Cond0 = N0->getOperand(0);
6044       SDValue Cond1 = N0->getOperand(1);
6045       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6046                                         N1.getValueType(), Cond1, N1, N2);
6047       if (normalizeToSequence || !InnerSelect.use_empty())
6048         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
6049                            InnerSelect, N2);
6050     }
6051     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6052     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6053       SDValue Cond0 = N0->getOperand(0);
6054       SDValue Cond1 = N0->getOperand(1);
6055       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6056                                         N1.getValueType(), Cond1, N1, N2);
6057       if (normalizeToSequence || !InnerSelect.use_empty())
6058         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
6059                            InnerSelect);
6060     }
6061 
6062     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6063     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6064       SDValue N1_0 = N1->getOperand(0);
6065       SDValue N1_1 = N1->getOperand(1);
6066       SDValue N1_2 = N1->getOperand(2);
6067       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6068         // Create the actual and node if we can generate good code for it.
6069         if (!normalizeToSequence) {
6070           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
6071                                     N0, N1_0);
6072           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
6073                              N1_1, N2);
6074         }
6075         // Otherwise see if we can optimize the "and" to a better pattern.
6076         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6077           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6078                              N1_1, N2);
6079       }
6080     }
6081     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6082     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6083       SDValue N2_0 = N2->getOperand(0);
6084       SDValue N2_1 = N2->getOperand(1);
6085       SDValue N2_2 = N2->getOperand(2);
6086       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6087         // Create the actual or node if we can generate good code for it.
6088         if (!normalizeToSequence) {
6089           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
6090                                    N0, N2_0);
6091           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
6092                              N1, N2_2);
6093         }
6094         // Otherwise see if we can optimize to a better pattern.
6095         if (SDValue Combined = visitORLike(N0, N2_0, N))
6096           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6097                              N1, N2_2);
6098       }
6099     }
6100   }
6101 
6102   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6103   if (VT0 == MVT::i1) {
6104     if (N0->getOpcode() == ISD::XOR) {
6105       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6106         SDValue Cond0 = N0->getOperand(0);
6107         if (C->isOne())
6108           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
6109                              Cond0, N2, N1);
6110       }
6111     }
6112   }
6113 
6114   // fold selects based on a setcc into other things, such as min/max/abs
6115   if (N0.getOpcode() == ISD::SETCC) {
6116     // select x, y (fcmp lt x, y) -> fminnum x, y
6117     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6118     //
6119     // This is OK if we don't care about what happens if either operand is a
6120     // NaN.
6121     //
6122 
6123     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6124     // no signed zeros as well as no nans.
6125     const TargetOptions &Options = DAG.getTarget().Options;
6126     if (Options.UnsafeFPMath &&
6127         VT.isFloatingPoint() && N0.hasOneUse() &&
6128         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6129       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6130 
6131       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
6132                                                 N0.getOperand(1), N1, N2, CC,
6133                                                 TLI, DAG))
6134         return FMinMax;
6135     }
6136 
6137     if ((!LegalOperations &&
6138          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6139         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6140       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
6141                          N0.getOperand(0), N0.getOperand(1),
6142                          N1, N2, N0.getOperand(2));
6143     return SimplifySelect(SDLoc(N), N0, N1, N2);
6144   }
6145 
6146   return SDValue();
6147 }
6148 
6149 static
6150 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6151   SDLoc DL(N);
6152   EVT LoVT, HiVT;
6153   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6154 
6155   // Split the inputs.
6156   SDValue Lo, Hi, LL, LH, RL, RH;
6157   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6158   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6159 
6160   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6161   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6162 
6163   return std::make_pair(Lo, Hi);
6164 }
6165 
6166 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6167 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6168 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6169   SDLoc DL(N);
6170   SDValue Cond = N->getOperand(0);
6171   SDValue LHS = N->getOperand(1);
6172   SDValue RHS = N->getOperand(2);
6173   EVT VT = N->getValueType(0);
6174   int NumElems = VT.getVectorNumElements();
6175   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6176          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6177          Cond.getOpcode() == ISD::BUILD_VECTOR);
6178 
6179   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6180   // binary ones here.
6181   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6182     return SDValue();
6183 
6184   // We're sure we have an even number of elements due to the
6185   // concat_vectors we have as arguments to vselect.
6186   // Skip BV elements until we find one that's not an UNDEF
6187   // After we find an UNDEF element, keep looping until we get to half the
6188   // length of the BV and see if all the non-undef nodes are the same.
6189   ConstantSDNode *BottomHalf = nullptr;
6190   for (int i = 0; i < NumElems / 2; ++i) {
6191     if (Cond->getOperand(i)->isUndef())
6192       continue;
6193 
6194     if (BottomHalf == nullptr)
6195       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6196     else if (Cond->getOperand(i).getNode() != BottomHalf)
6197       return SDValue();
6198   }
6199 
6200   // Do the same for the second half of the BuildVector
6201   ConstantSDNode *TopHalf = nullptr;
6202   for (int i = NumElems / 2; i < NumElems; ++i) {
6203     if (Cond->getOperand(i)->isUndef())
6204       continue;
6205 
6206     if (TopHalf == nullptr)
6207       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6208     else if (Cond->getOperand(i).getNode() != TopHalf)
6209       return SDValue();
6210   }
6211 
6212   assert(TopHalf && BottomHalf &&
6213          "One half of the selector was all UNDEFs and the other was all the "
6214          "same value. This should have been addressed before this function.");
6215   return DAG.getNode(
6216       ISD::CONCAT_VECTORS, DL, VT,
6217       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6218       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6219 }
6220 
6221 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6222 
6223   if (Level >= AfterLegalizeTypes)
6224     return SDValue();
6225 
6226   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6227   SDValue Mask = MSC->getMask();
6228   SDValue Data  = MSC->getValue();
6229   SDLoc DL(N);
6230 
6231   // If the MSCATTER data type requires splitting and the mask is provided by a
6232   // SETCC, then split both nodes and its operands before legalization. This
6233   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6234   // and enables future optimizations (e.g. min/max pattern matching on X86).
6235   if (Mask.getOpcode() != ISD::SETCC)
6236     return SDValue();
6237 
6238   // Check if any splitting is required.
6239   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6240       TargetLowering::TypeSplitVector)
6241     return SDValue();
6242   SDValue MaskLo, MaskHi, Lo, Hi;
6243   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6244 
6245   EVT LoVT, HiVT;
6246   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6247 
6248   SDValue Chain = MSC->getChain();
6249 
6250   EVT MemoryVT = MSC->getMemoryVT();
6251   unsigned Alignment = MSC->getOriginalAlignment();
6252 
6253   EVT LoMemVT, HiMemVT;
6254   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6255 
6256   SDValue DataLo, DataHi;
6257   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6258 
6259   SDValue BasePtr = MSC->getBasePtr();
6260   SDValue IndexLo, IndexHi;
6261   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6262 
6263   MachineMemOperand *MMO = DAG.getMachineFunction().
6264     getMachineMemOperand(MSC->getPointerInfo(),
6265                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6266                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6267 
6268   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6269   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6270                             DL, OpsLo, MMO);
6271 
6272   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6273   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6274                             DL, OpsHi, MMO);
6275 
6276   AddToWorklist(Lo.getNode());
6277   AddToWorklist(Hi.getNode());
6278 
6279   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6280 }
6281 
6282 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6283 
6284   if (Level >= AfterLegalizeTypes)
6285     return SDValue();
6286 
6287   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6288   SDValue Mask = MST->getMask();
6289   SDValue Data  = MST->getValue();
6290   EVT VT = Data.getValueType();
6291   SDLoc DL(N);
6292 
6293   // If the MSTORE data type requires splitting and the mask is provided by a
6294   // SETCC, then split both nodes and its operands before legalization. This
6295   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6296   // and enables future optimizations (e.g. min/max pattern matching on X86).
6297   if (Mask.getOpcode() == ISD::SETCC) {
6298 
6299     // Check if any splitting is required.
6300     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6301         TargetLowering::TypeSplitVector)
6302       return SDValue();
6303 
6304     SDValue MaskLo, MaskHi, Lo, Hi;
6305     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6306 
6307     SDValue Chain = MST->getChain();
6308     SDValue Ptr   = MST->getBasePtr();
6309 
6310     EVT MemoryVT = MST->getMemoryVT();
6311     unsigned Alignment = MST->getOriginalAlignment();
6312 
6313     // if Alignment is equal to the vector size,
6314     // take the half of it for the second part
6315     unsigned SecondHalfAlignment =
6316       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6317 
6318     EVT LoMemVT, HiMemVT;
6319     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6320 
6321     SDValue DataLo, DataHi;
6322     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6323 
6324     MachineMemOperand *MMO = DAG.getMachineFunction().
6325       getMachineMemOperand(MST->getPointerInfo(),
6326                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6327                            Alignment, MST->getAAInfo(), MST->getRanges());
6328 
6329     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6330                             MST->isTruncatingStore(),
6331                             MST->isCompressingStore());
6332 
6333     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6334                                      MST->isCompressingStore());
6335 
6336     MMO = DAG.getMachineFunction().
6337       getMachineMemOperand(MST->getPointerInfo(),
6338                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6339                            SecondHalfAlignment, MST->getAAInfo(),
6340                            MST->getRanges());
6341 
6342     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6343                             MST->isTruncatingStore(),
6344                             MST->isCompressingStore());
6345 
6346     AddToWorklist(Lo.getNode());
6347     AddToWorklist(Hi.getNode());
6348 
6349     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6350   }
6351   return SDValue();
6352 }
6353 
6354 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6355 
6356   if (Level >= AfterLegalizeTypes)
6357     return SDValue();
6358 
6359   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6360   SDValue Mask = MGT->getMask();
6361   SDLoc DL(N);
6362 
6363   // If the MGATHER result requires splitting and the mask is provided by a
6364   // SETCC, then split both nodes and its operands before legalization. This
6365   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6366   // and enables future optimizations (e.g. min/max pattern matching on X86).
6367 
6368   if (Mask.getOpcode() != ISD::SETCC)
6369     return SDValue();
6370 
6371   EVT VT = N->getValueType(0);
6372 
6373   // Check if any splitting is required.
6374   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6375       TargetLowering::TypeSplitVector)
6376     return SDValue();
6377 
6378   SDValue MaskLo, MaskHi, Lo, Hi;
6379   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6380 
6381   SDValue Src0 = MGT->getValue();
6382   SDValue Src0Lo, Src0Hi;
6383   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6384 
6385   EVT LoVT, HiVT;
6386   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6387 
6388   SDValue Chain = MGT->getChain();
6389   EVT MemoryVT = MGT->getMemoryVT();
6390   unsigned Alignment = MGT->getOriginalAlignment();
6391 
6392   EVT LoMemVT, HiMemVT;
6393   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6394 
6395   SDValue BasePtr = MGT->getBasePtr();
6396   SDValue Index = MGT->getIndex();
6397   SDValue IndexLo, IndexHi;
6398   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6399 
6400   MachineMemOperand *MMO = DAG.getMachineFunction().
6401     getMachineMemOperand(MGT->getPointerInfo(),
6402                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6403                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6404 
6405   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6406   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6407                             MMO);
6408 
6409   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6410   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6411                             MMO);
6412 
6413   AddToWorklist(Lo.getNode());
6414   AddToWorklist(Hi.getNode());
6415 
6416   // Build a factor node to remember that this load is independent of the
6417   // other one.
6418   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6419                       Hi.getValue(1));
6420 
6421   // Legalized the chain result - switch anything that used the old chain to
6422   // use the new one.
6423   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6424 
6425   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6426 
6427   SDValue RetOps[] = { GatherRes, Chain };
6428   return DAG.getMergeValues(RetOps, DL);
6429 }
6430 
6431 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6432 
6433   if (Level >= AfterLegalizeTypes)
6434     return SDValue();
6435 
6436   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6437   SDValue Mask = MLD->getMask();
6438   SDLoc DL(N);
6439 
6440   // If the MLOAD result requires splitting and the mask is provided by a
6441   // SETCC, then split both nodes and its operands before legalization. This
6442   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6443   // and enables future optimizations (e.g. min/max pattern matching on X86).
6444 
6445   if (Mask.getOpcode() == ISD::SETCC) {
6446     EVT VT = N->getValueType(0);
6447 
6448     // Check if any splitting is required.
6449     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6450         TargetLowering::TypeSplitVector)
6451       return SDValue();
6452 
6453     SDValue MaskLo, MaskHi, Lo, Hi;
6454     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6455 
6456     SDValue Src0 = MLD->getSrc0();
6457     SDValue Src0Lo, Src0Hi;
6458     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6459 
6460     EVT LoVT, HiVT;
6461     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6462 
6463     SDValue Chain = MLD->getChain();
6464     SDValue Ptr   = MLD->getBasePtr();
6465     EVT MemoryVT = MLD->getMemoryVT();
6466     unsigned Alignment = MLD->getOriginalAlignment();
6467 
6468     // if Alignment is equal to the vector size,
6469     // take the half of it for the second part
6470     unsigned SecondHalfAlignment =
6471       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6472          Alignment/2 : Alignment;
6473 
6474     EVT LoMemVT, HiMemVT;
6475     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6476 
6477     MachineMemOperand *MMO = DAG.getMachineFunction().
6478     getMachineMemOperand(MLD->getPointerInfo(),
6479                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6480                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6481 
6482     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6483                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6484 
6485     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6486                                      MLD->isExpandingLoad());
6487 
6488     MMO = DAG.getMachineFunction().
6489     getMachineMemOperand(MLD->getPointerInfo(),
6490                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6491                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6492 
6493     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6494                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6495 
6496     AddToWorklist(Lo.getNode());
6497     AddToWorklist(Hi.getNode());
6498 
6499     // Build a factor node to remember that this load is independent of the
6500     // other one.
6501     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6502                         Hi.getValue(1));
6503 
6504     // Legalized the chain result - switch anything that used the old chain to
6505     // use the new one.
6506     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6507 
6508     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6509 
6510     SDValue RetOps[] = { LoadRes, Chain };
6511     return DAG.getMergeValues(RetOps, DL);
6512   }
6513   return SDValue();
6514 }
6515 
6516 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6517   SDValue N0 = N->getOperand(0);
6518   SDValue N1 = N->getOperand(1);
6519   SDValue N2 = N->getOperand(2);
6520   SDLoc DL(N);
6521 
6522   // fold (vselect C, X, X) -> X
6523   if (N1 == N2)
6524     return N1;
6525 
6526   // Canonicalize integer abs.
6527   // vselect (setg[te] X,  0),  X, -X ->
6528   // vselect (setgt    X, -1),  X, -X ->
6529   // vselect (setl[te] X,  0), -X,  X ->
6530   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6531   if (N0.getOpcode() == ISD::SETCC) {
6532     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6533     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6534     bool isAbs = false;
6535     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6536 
6537     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6538          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6539         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6540       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6541     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6542              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6543       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6544 
6545     if (isAbs) {
6546       EVT VT = LHS.getValueType();
6547       SDValue Shift = DAG.getNode(
6548           ISD::SRA, DL, VT, LHS,
6549           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6550       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6551       AddToWorklist(Shift.getNode());
6552       AddToWorklist(Add.getNode());
6553       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6554     }
6555   }
6556 
6557   if (SimplifySelectOps(N, N1, N2))
6558     return SDValue(N, 0);  // Don't revisit N.
6559 
6560   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6561   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6562     return N1;
6563   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6564   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6565     return N2;
6566 
6567   // The ConvertSelectToConcatVector function is assuming both the above
6568   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6569   // and addressed.
6570   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6571       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6572       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6573     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6574       return CV;
6575   }
6576 
6577   return SDValue();
6578 }
6579 
6580 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6581   SDValue N0 = N->getOperand(0);
6582   SDValue N1 = N->getOperand(1);
6583   SDValue N2 = N->getOperand(2);
6584   SDValue N3 = N->getOperand(3);
6585   SDValue N4 = N->getOperand(4);
6586   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6587 
6588   // fold select_cc lhs, rhs, x, x, cc -> x
6589   if (N2 == N3)
6590     return N2;
6591 
6592   // Determine if the condition we're dealing with is constant
6593   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6594                                   CC, SDLoc(N), false)) {
6595     AddToWorklist(SCC.getNode());
6596 
6597     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6598       if (!SCCC->isNullValue())
6599         return N2;    // cond always true -> true val
6600       else
6601         return N3;    // cond always false -> false val
6602     } else if (SCC->isUndef()) {
6603       // When the condition is UNDEF, just return the first operand. This is
6604       // coherent the DAG creation, no setcc node is created in this case
6605       return N2;
6606     } else if (SCC.getOpcode() == ISD::SETCC) {
6607       // Fold to a simpler select_cc
6608       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6609                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6610                          SCC.getOperand(2));
6611     }
6612   }
6613 
6614   // If we can fold this based on the true/false value, do so.
6615   if (SimplifySelectOps(N, N2, N3))
6616     return SDValue(N, 0);  // Don't revisit N.
6617 
6618   // fold select_cc into other things, such as min/max/abs
6619   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6620 }
6621 
6622 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6623   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6624                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6625                        SDLoc(N));
6626 }
6627 
6628 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6629   SDValue LHS = N->getOperand(0);
6630   SDValue RHS = N->getOperand(1);
6631   SDValue Carry = N->getOperand(2);
6632   SDValue Cond = N->getOperand(3);
6633 
6634   // If Carry is false, fold to a regular SETCC.
6635   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6636     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6637 
6638   return SDValue();
6639 }
6640 
6641 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6642 /// a build_vector of constants.
6643 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6644 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6645 /// Vector extends are not folded if operations are legal; this is to
6646 /// avoid introducing illegal build_vector dag nodes.
6647 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6648                                          SelectionDAG &DAG, bool LegalTypes,
6649                                          bool LegalOperations) {
6650   unsigned Opcode = N->getOpcode();
6651   SDValue N0 = N->getOperand(0);
6652   EVT VT = N->getValueType(0);
6653 
6654   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6655          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6656          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6657          && "Expected EXTEND dag node in input!");
6658 
6659   // fold (sext c1) -> c1
6660   // fold (zext c1) -> c1
6661   // fold (aext c1) -> c1
6662   if (isa<ConstantSDNode>(N0))
6663     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6664 
6665   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6666   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6667   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6668   EVT SVT = VT.getScalarType();
6669   if (!(VT.isVector() &&
6670       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6671       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6672     return nullptr;
6673 
6674   // We can fold this node into a build_vector.
6675   unsigned VTBits = SVT.getSizeInBits();
6676   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6677   SmallVector<SDValue, 8> Elts;
6678   unsigned NumElts = VT.getVectorNumElements();
6679   SDLoc DL(N);
6680 
6681   for (unsigned i=0; i != NumElts; ++i) {
6682     SDValue Op = N0->getOperand(i);
6683     if (Op->isUndef()) {
6684       Elts.push_back(DAG.getUNDEF(SVT));
6685       continue;
6686     }
6687 
6688     SDLoc DL(Op);
6689     // Get the constant value and if needed trunc it to the size of the type.
6690     // Nodes like build_vector might have constants wider than the scalar type.
6691     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6692     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6693       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6694     else
6695       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6696   }
6697 
6698   return DAG.getBuildVector(VT, DL, Elts).getNode();
6699 }
6700 
6701 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6702 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6703 // transformation. Returns true if extension are possible and the above
6704 // mentioned transformation is profitable.
6705 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6706                                     unsigned ExtOpc,
6707                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6708                                     const TargetLowering &TLI) {
6709   bool HasCopyToRegUses = false;
6710   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6711   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6712                             UE = N0.getNode()->use_end();
6713        UI != UE; ++UI) {
6714     SDNode *User = *UI;
6715     if (User == N)
6716       continue;
6717     if (UI.getUse().getResNo() != N0.getResNo())
6718       continue;
6719     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6720     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6721       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6722       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6723         // Sign bits will be lost after a zext.
6724         return false;
6725       bool Add = false;
6726       for (unsigned i = 0; i != 2; ++i) {
6727         SDValue UseOp = User->getOperand(i);
6728         if (UseOp == N0)
6729           continue;
6730         if (!isa<ConstantSDNode>(UseOp))
6731           return false;
6732         Add = true;
6733       }
6734       if (Add)
6735         ExtendNodes.push_back(User);
6736       continue;
6737     }
6738     // If truncates aren't free and there are users we can't
6739     // extend, it isn't worthwhile.
6740     if (!isTruncFree)
6741       return false;
6742     // Remember if this value is live-out.
6743     if (User->getOpcode() == ISD::CopyToReg)
6744       HasCopyToRegUses = true;
6745   }
6746 
6747   if (HasCopyToRegUses) {
6748     bool BothLiveOut = false;
6749     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6750          UI != UE; ++UI) {
6751       SDUse &Use = UI.getUse();
6752       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6753         BothLiveOut = true;
6754         break;
6755       }
6756     }
6757     if (BothLiveOut)
6758       // Both unextended and extended values are live out. There had better be
6759       // a good reason for the transformation.
6760       return ExtendNodes.size();
6761   }
6762   return true;
6763 }
6764 
6765 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6766                                   SDValue Trunc, SDValue ExtLoad,
6767                                   const SDLoc &DL, ISD::NodeType ExtType) {
6768   // Extend SetCC uses if necessary.
6769   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6770     SDNode *SetCC = SetCCs[i];
6771     SmallVector<SDValue, 4> Ops;
6772 
6773     for (unsigned j = 0; j != 2; ++j) {
6774       SDValue SOp = SetCC->getOperand(j);
6775       if (SOp == Trunc)
6776         Ops.push_back(ExtLoad);
6777       else
6778         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6779     }
6780 
6781     Ops.push_back(SetCC->getOperand(2));
6782     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6783   }
6784 }
6785 
6786 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6787 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6788   SDValue N0 = N->getOperand(0);
6789   EVT DstVT = N->getValueType(0);
6790   EVT SrcVT = N0.getValueType();
6791 
6792   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6793           N->getOpcode() == ISD::ZERO_EXTEND) &&
6794          "Unexpected node type (not an extend)!");
6795 
6796   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6797   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6798   //   (v8i32 (sext (v8i16 (load x))))
6799   // into:
6800   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6801   //                          (v4i32 (sextload (x + 16)))))
6802   // Where uses of the original load, i.e.:
6803   //   (v8i16 (load x))
6804   // are replaced with:
6805   //   (v8i16 (truncate
6806   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6807   //                            (v4i32 (sextload (x + 16)))))))
6808   //
6809   // This combine is only applicable to illegal, but splittable, vectors.
6810   // All legal types, and illegal non-vector types, are handled elsewhere.
6811   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6812   //
6813   if (N0->getOpcode() != ISD::LOAD)
6814     return SDValue();
6815 
6816   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6817 
6818   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6819       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6820       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6821     return SDValue();
6822 
6823   SmallVector<SDNode *, 4> SetCCs;
6824   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6825     return SDValue();
6826 
6827   ISD::LoadExtType ExtType =
6828       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6829 
6830   // Try to split the vector types to get down to legal types.
6831   EVT SplitSrcVT = SrcVT;
6832   EVT SplitDstVT = DstVT;
6833   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6834          SplitSrcVT.getVectorNumElements() > 1) {
6835     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6836     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6837   }
6838 
6839   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6840     return SDValue();
6841 
6842   SDLoc DL(N);
6843   const unsigned NumSplits =
6844       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6845   const unsigned Stride = SplitSrcVT.getStoreSize();
6846   SmallVector<SDValue, 4> Loads;
6847   SmallVector<SDValue, 4> Chains;
6848 
6849   SDValue BasePtr = LN0->getBasePtr();
6850   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6851     const unsigned Offset = Idx * Stride;
6852     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6853 
6854     SDValue SplitLoad = DAG.getExtLoad(
6855         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6856         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6857         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6858 
6859     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6860                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6861 
6862     Loads.push_back(SplitLoad.getValue(0));
6863     Chains.push_back(SplitLoad.getValue(1));
6864   }
6865 
6866   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6867   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6868 
6869   // Simplify TF.
6870   AddToWorklist(NewChain.getNode());
6871 
6872   CombineTo(N, NewValue);
6873 
6874   // Replace uses of the original load (before extension)
6875   // with a truncate of the concatenated sextloaded vectors.
6876   SDValue Trunc =
6877       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6878   CombineTo(N0.getNode(), Trunc, NewChain);
6879   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6880                   (ISD::NodeType)N->getOpcode());
6881   return SDValue(N, 0); // Return N so it doesn't get rechecked!
6882 }
6883 
6884 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6885   SDValue N0 = N->getOperand(0);
6886   EVT VT = N->getValueType(0);
6887   SDLoc DL(N);
6888 
6889   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6890                                               LegalOperations))
6891     return SDValue(Res, 0);
6892 
6893   // fold (sext (sext x)) -> (sext x)
6894   // fold (sext (aext x)) -> (sext x)
6895   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6896     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
6897 
6898   if (N0.getOpcode() == ISD::TRUNCATE) {
6899     // fold (sext (truncate (load x))) -> (sext (smaller load x))
6900     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6901     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6902       SDNode *oye = N0.getOperand(0).getNode();
6903       if (NarrowLoad.getNode() != N0.getNode()) {
6904         CombineTo(N0.getNode(), NarrowLoad);
6905         // CombineTo deleted the truncate, if needed, but not what's under it.
6906         AddToWorklist(oye);
6907       }
6908       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6909     }
6910 
6911     // See if the value being truncated is already sign extended.  If so, just
6912     // eliminate the trunc/sext pair.
6913     SDValue Op = N0.getOperand(0);
6914     unsigned OpBits   = Op.getScalarValueSizeInBits();
6915     unsigned MidBits  = N0.getScalarValueSizeInBits();
6916     unsigned DestBits = VT.getScalarSizeInBits();
6917     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6918 
6919     if (OpBits == DestBits) {
6920       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
6921       // bits, it is already ready.
6922       if (NumSignBits > DestBits-MidBits)
6923         return Op;
6924     } else if (OpBits < DestBits) {
6925       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
6926       // bits, just sext from i32.
6927       if (NumSignBits > OpBits-MidBits)
6928         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
6929     } else {
6930       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6931       // bits, just truncate to i32.
6932       if (NumSignBits > OpBits-MidBits)
6933         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6934     }
6935 
6936     // fold (sext (truncate x)) -> (sextinreg x).
6937     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6938                                                  N0.getValueType())) {
6939       if (OpBits < DestBits)
6940         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6941       else if (OpBits > DestBits)
6942         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6943       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
6944                          DAG.getValueType(N0.getValueType()));
6945     }
6946   }
6947 
6948   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6949   // Only generate vector extloads when 1) they're legal, and 2) they are
6950   // deemed desirable by the target.
6951   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6952       ((!LegalOperations && !VT.isVector() &&
6953         !cast<LoadSDNode>(N0)->isVolatile()) ||
6954        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6955     bool DoXform = true;
6956     SmallVector<SDNode*, 4> SetCCs;
6957     if (!N0.hasOneUse())
6958       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6959     if (VT.isVector())
6960       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6961     if (DoXform) {
6962       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6963       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
6964                                        LN0->getBasePtr(), N0.getValueType(),
6965                                        LN0->getMemOperand());
6966       CombineTo(N, ExtLoad);
6967       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6968                                   N0.getValueType(), ExtLoad);
6969       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6970       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
6971       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6972     }
6973   }
6974 
6975   // fold (sext (load x)) to multiple smaller sextloads.
6976   // Only on illegal but splittable vectors.
6977   if (SDValue ExtLoad = CombineExtLoad(N))
6978     return ExtLoad;
6979 
6980   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6981   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6982   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6983       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6984     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6985     EVT MemVT = LN0->getMemoryVT();
6986     if ((!LegalOperations && !LN0->isVolatile()) ||
6987         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6988       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
6989                                        LN0->getBasePtr(), MemVT,
6990                                        LN0->getMemOperand());
6991       CombineTo(N, ExtLoad);
6992       CombineTo(N0.getNode(),
6993                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6994                             N0.getValueType(), ExtLoad),
6995                 ExtLoad.getValue(1));
6996       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6997     }
6998   }
6999 
7000   // fold (sext (and/or/xor (load x), cst)) ->
7001   //      (and/or/xor (sextload x), (sext cst))
7002   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7003        N0.getOpcode() == ISD::XOR) &&
7004       isa<LoadSDNode>(N0.getOperand(0)) &&
7005       N0.getOperand(1).getOpcode() == ISD::Constant &&
7006       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
7007       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7008     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7009     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
7010       bool DoXform = true;
7011       SmallVector<SDNode*, 4> SetCCs;
7012       if (!N0.hasOneUse())
7013         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7014                                           SetCCs, TLI);
7015       if (DoXform) {
7016         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7017                                          LN0->getChain(), LN0->getBasePtr(),
7018                                          LN0->getMemoryVT(),
7019                                          LN0->getMemOperand());
7020         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7021         Mask = Mask.sext(VT.getSizeInBits());
7022         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7023                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7024         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7025                                     SDLoc(N0.getOperand(0)),
7026                                     N0.getOperand(0).getValueType(), ExtLoad);
7027         CombineTo(N, And);
7028         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7029         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7030         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7031       }
7032     }
7033   }
7034 
7035   if (N0.getOpcode() == ISD::SETCC) {
7036     SDValue N00 = N0.getOperand(0);
7037     SDValue N01 = N0.getOperand(1);
7038     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7039     EVT N00VT = N0.getOperand(0).getValueType();
7040 
7041     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7042     // Only do this before legalize for now.
7043     if (VT.isVector() && !LegalOperations &&
7044         TLI.getBooleanContents(N00VT) ==
7045             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7046       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7047       // of the same size as the compared operands. Only optimize sext(setcc())
7048       // if this is the case.
7049       EVT SVT = getSetCCResultType(N00VT);
7050 
7051       // We know that the # elements of the results is the same as the
7052       // # elements of the compare (and the # elements of the compare result
7053       // for that matter).  Check to see that they are the same size.  If so,
7054       // we know that the element size of the sext'd result matches the
7055       // element size of the compare operands.
7056       if (VT.getSizeInBits() == SVT.getSizeInBits())
7057         return DAG.getSetCC(DL, VT, N00, N01, CC);
7058 
7059       // If the desired elements are smaller or larger than the source
7060       // elements, we can use a matching integer vector type and then
7061       // truncate/sign extend.
7062       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7063       if (SVT == MatchingVecType) {
7064         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7065         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7066       }
7067     }
7068 
7069     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7070     // Here, T can be 1 or -1, depending on the type of the setcc and
7071     // getBooleanContents().
7072     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7073 
7074     // To determine the "true" side of the select, we need to know the high bit
7075     // of the value returned by the setcc if it evaluates to true.
7076     // If the type of the setcc is i1, then the true case of the select is just
7077     // sext(i1 1), that is, -1.
7078     // If the type of the setcc is larger (say, i8) then the value of the high
7079     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7080     // of the appropriate width.
7081     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7082                                            : TLI.getConstTrueVal(DAG, VT, DL);
7083     SDValue Zero = DAG.getConstant(0, DL, VT);
7084     if (SDValue SCC =
7085             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7086       return SCC;
7087 
7088     if (!VT.isVector()) {
7089       EVT SetCCVT = getSetCCResultType(N00VT);
7090       // Don't do this transform for i1 because there's a select transform
7091       // that would reverse it.
7092       // TODO: We should not do this transform at all without a target hook
7093       // because a sext is likely cheaper than a select?
7094       if (SetCCVT.getScalarSizeInBits() != 1 &&
7095           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7096         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7097         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7098       }
7099     }
7100   }
7101 
7102   // fold (sext x) -> (zext x) if the sign bit is known zero.
7103   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7104       DAG.SignBitIsZero(N0))
7105     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7106 
7107   return SDValue();
7108 }
7109 
7110 // isTruncateOf - If N is a truncate of some other value, return true, record
7111 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
7112 // This function computes KnownZero to avoid a duplicated call to
7113 // computeKnownBits in the caller.
7114 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7115                          APInt &KnownZero) {
7116   APInt KnownOne;
7117   if (N->getOpcode() == ISD::TRUNCATE) {
7118     Op = N->getOperand(0);
7119     DAG.computeKnownBits(Op, KnownZero, KnownOne);
7120     return true;
7121   }
7122 
7123   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7124       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7125     return false;
7126 
7127   SDValue Op0 = N->getOperand(0);
7128   SDValue Op1 = N->getOperand(1);
7129   assert(Op0.getValueType() == Op1.getValueType());
7130 
7131   if (isNullConstant(Op0))
7132     Op = Op1;
7133   else if (isNullConstant(Op1))
7134     Op = Op0;
7135   else
7136     return false;
7137 
7138   DAG.computeKnownBits(Op, KnownZero, KnownOne);
7139 
7140   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
7141     return false;
7142 
7143   return true;
7144 }
7145 
7146 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7147   SDValue N0 = N->getOperand(0);
7148   EVT VT = N->getValueType(0);
7149 
7150   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7151                                               LegalOperations))
7152     return SDValue(Res, 0);
7153 
7154   // fold (zext (zext x)) -> (zext x)
7155   // fold (zext (aext x)) -> (zext x)
7156   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7157     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7158                        N0.getOperand(0));
7159 
7160   // fold (zext (truncate x)) -> (zext x) or
7161   //      (zext (truncate x)) -> (truncate x)
7162   // This is valid when the truncated bits of x are already zero.
7163   // FIXME: We should extend this to work for vectors too.
7164   SDValue Op;
7165   APInt KnownZero;
7166   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
7167     APInt TruncatedBits =
7168       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7169       APInt(Op.getValueSizeInBits(), 0) :
7170       APInt::getBitsSet(Op.getValueSizeInBits(),
7171                         N0.getValueSizeInBits(),
7172                         std::min(Op.getValueSizeInBits(),
7173                                  VT.getSizeInBits()));
7174     if (TruncatedBits == (KnownZero & TruncatedBits)) {
7175       if (VT.bitsGT(Op.getValueType()))
7176         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
7177       if (VT.bitsLT(Op.getValueType()))
7178         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
7179 
7180       return Op;
7181     }
7182   }
7183 
7184   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7185   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7186   if (N0.getOpcode() == ISD::TRUNCATE) {
7187     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7188       SDNode *oye = N0.getOperand(0).getNode();
7189       if (NarrowLoad.getNode() != N0.getNode()) {
7190         CombineTo(N0.getNode(), NarrowLoad);
7191         // CombineTo deleted the truncate, if needed, but not what's under it.
7192         AddToWorklist(oye);
7193       }
7194       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7195     }
7196   }
7197 
7198   // fold (zext (truncate x)) -> (and x, mask)
7199   if (N0.getOpcode() == ISD::TRUNCATE) {
7200     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7201     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7202     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7203       SDNode *oye = N0.getOperand(0).getNode();
7204       if (NarrowLoad.getNode() != N0.getNode()) {
7205         CombineTo(N0.getNode(), NarrowLoad);
7206         // CombineTo deleted the truncate, if needed, but not what's under it.
7207         AddToWorklist(oye);
7208       }
7209       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7210     }
7211 
7212     EVT SrcVT = N0.getOperand(0).getValueType();
7213     EVT MinVT = N0.getValueType();
7214 
7215     // Try to mask before the extension to avoid having to generate a larger mask,
7216     // possibly over several sub-vectors.
7217     if (SrcVT.bitsLT(VT)) {
7218       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7219                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7220         SDValue Op = N0.getOperand(0);
7221         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7222         AddToWorklist(Op.getNode());
7223         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7224       }
7225     }
7226 
7227     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7228       SDValue Op = N0.getOperand(0);
7229       if (SrcVT.bitsLT(VT)) {
7230         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
7231         AddToWorklist(Op.getNode());
7232       } else if (SrcVT.bitsGT(VT)) {
7233         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
7234         AddToWorklist(Op.getNode());
7235       }
7236       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7237     }
7238   }
7239 
7240   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7241   // if either of the casts is not free.
7242   if (N0.getOpcode() == ISD::AND &&
7243       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7244       N0.getOperand(1).getOpcode() == ISD::Constant &&
7245       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7246                            N0.getValueType()) ||
7247        !TLI.isZExtFree(N0.getValueType(), VT))) {
7248     SDValue X = N0.getOperand(0).getOperand(0);
7249     if (X.getValueType().bitsLT(VT)) {
7250       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
7251     } else if (X.getValueType().bitsGT(VT)) {
7252       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7253     }
7254     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7255     Mask = Mask.zext(VT.getSizeInBits());
7256     SDLoc DL(N);
7257     return DAG.getNode(ISD::AND, DL, VT,
7258                        X, DAG.getConstant(Mask, DL, VT));
7259   }
7260 
7261   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7262   // Only generate vector extloads when 1) they're legal, and 2) they are
7263   // deemed desirable by the target.
7264   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7265       ((!LegalOperations && !VT.isVector() &&
7266         !cast<LoadSDNode>(N0)->isVolatile()) ||
7267        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7268     bool DoXform = true;
7269     SmallVector<SDNode*, 4> SetCCs;
7270     if (!N0.hasOneUse())
7271       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7272     if (VT.isVector())
7273       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7274     if (DoXform) {
7275       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7276       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7277                                        LN0->getChain(),
7278                                        LN0->getBasePtr(), N0.getValueType(),
7279                                        LN0->getMemOperand());
7280       CombineTo(N, ExtLoad);
7281       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7282                                   N0.getValueType(), ExtLoad);
7283       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7284 
7285       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7286                       ISD::ZERO_EXTEND);
7287       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7288     }
7289   }
7290 
7291   // fold (zext (load x)) to multiple smaller zextloads.
7292   // Only on illegal but splittable vectors.
7293   if (SDValue ExtLoad = CombineExtLoad(N))
7294     return ExtLoad;
7295 
7296   // fold (zext (and/or/xor (load x), cst)) ->
7297   //      (and/or/xor (zextload x), (zext cst))
7298   // Unless (and (load x) cst) will match as a zextload already and has
7299   // additional users.
7300   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7301        N0.getOpcode() == ISD::XOR) &&
7302       isa<LoadSDNode>(N0.getOperand(0)) &&
7303       N0.getOperand(1).getOpcode() == ISD::Constant &&
7304       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7305       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7306     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7307     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7308       bool DoXform = true;
7309       SmallVector<SDNode*, 4> SetCCs;
7310       if (!N0.hasOneUse()) {
7311         if (N0.getOpcode() == ISD::AND) {
7312           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7313           auto NarrowLoad = false;
7314           EVT LoadResultTy = AndC->getValueType(0);
7315           EVT ExtVT, LoadedVT;
7316           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7317                                NarrowLoad))
7318             DoXform = false;
7319         }
7320         if (DoXform)
7321           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7322                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7323       }
7324       if (DoXform) {
7325         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7326                                          LN0->getChain(), LN0->getBasePtr(),
7327                                          LN0->getMemoryVT(),
7328                                          LN0->getMemOperand());
7329         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7330         Mask = Mask.zext(VT.getSizeInBits());
7331         SDLoc DL(N);
7332         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7333                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7334         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7335                                     SDLoc(N0.getOperand(0)),
7336                                     N0.getOperand(0).getValueType(), ExtLoad);
7337         CombineTo(N, And);
7338         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7339         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
7340                         ISD::ZERO_EXTEND);
7341         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7342       }
7343     }
7344   }
7345 
7346   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7347   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7348   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7349       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7350     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7351     EVT MemVT = LN0->getMemoryVT();
7352     if ((!LegalOperations && !LN0->isVolatile()) ||
7353         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7354       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7355                                        LN0->getChain(),
7356                                        LN0->getBasePtr(), MemVT,
7357                                        LN0->getMemOperand());
7358       CombineTo(N, ExtLoad);
7359       CombineTo(N0.getNode(),
7360                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7361                             ExtLoad),
7362                 ExtLoad.getValue(1));
7363       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7364     }
7365   }
7366 
7367   if (N0.getOpcode() == ISD::SETCC) {
7368     // Only do this before legalize for now.
7369     if (!LegalOperations && VT.isVector() &&
7370         N0.getValueType().getVectorElementType() == MVT::i1) {
7371       EVT N00VT = N0.getOperand(0).getValueType();
7372       if (getSetCCResultType(N00VT) == N0.getValueType())
7373         return SDValue();
7374 
7375       // We know that the # elements of the results is the same as the #
7376       // elements of the compare (and the # elements of the compare result for
7377       // that matter). Check to see that they are the same size. If so, we know
7378       // that the element size of the sext'd result matches the element size of
7379       // the compare operands.
7380       SDLoc DL(N);
7381       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7382       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7383         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7384         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7385                                      N0.getOperand(1), N0.getOperand(2));
7386         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7387       }
7388 
7389       // If the desired elements are smaller or larger than the source
7390       // elements we can use a matching integer vector type and then
7391       // truncate/sign extend.
7392       EVT MatchingElementType = EVT::getIntegerVT(
7393           *DAG.getContext(), N00VT.getScalarSizeInBits());
7394       EVT MatchingVectorType = EVT::getVectorVT(
7395           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7396       SDValue VsetCC =
7397           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7398                       N0.getOperand(1), N0.getOperand(2));
7399       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7400                          VecOnes);
7401     }
7402 
7403     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7404     SDLoc DL(N);
7405     if (SDValue SCC = SimplifySelectCC(
7406             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7407             DAG.getConstant(0, DL, VT),
7408             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7409       return SCC;
7410   }
7411 
7412   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7413   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7414       isa<ConstantSDNode>(N0.getOperand(1)) &&
7415       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7416       N0.hasOneUse()) {
7417     SDValue ShAmt = N0.getOperand(1);
7418     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7419     if (N0.getOpcode() == ISD::SHL) {
7420       SDValue InnerZExt = N0.getOperand(0);
7421       // If the original shl may be shifting out bits, do not perform this
7422       // transformation.
7423       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7424         InnerZExt.getOperand(0).getValueSizeInBits();
7425       if (ShAmtVal > KnownZeroBits)
7426         return SDValue();
7427     }
7428 
7429     SDLoc DL(N);
7430 
7431     // Ensure that the shift amount is wide enough for the shifted value.
7432     if (VT.getSizeInBits() >= 256)
7433       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7434 
7435     return DAG.getNode(N0.getOpcode(), DL, VT,
7436                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7437                        ShAmt);
7438   }
7439 
7440   return SDValue();
7441 }
7442 
7443 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7444   SDValue N0 = N->getOperand(0);
7445   EVT VT = N->getValueType(0);
7446 
7447   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7448                                               LegalOperations))
7449     return SDValue(Res, 0);
7450 
7451   // fold (aext (aext x)) -> (aext x)
7452   // fold (aext (zext x)) -> (zext x)
7453   // fold (aext (sext x)) -> (sext x)
7454   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7455       N0.getOpcode() == ISD::ZERO_EXTEND ||
7456       N0.getOpcode() == ISD::SIGN_EXTEND)
7457     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7458 
7459   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7460   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7461   if (N0.getOpcode() == ISD::TRUNCATE) {
7462     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7463       SDNode *oye = N0.getOperand(0).getNode();
7464       if (NarrowLoad.getNode() != N0.getNode()) {
7465         CombineTo(N0.getNode(), NarrowLoad);
7466         // CombineTo deleted the truncate, if needed, but not what's under it.
7467         AddToWorklist(oye);
7468       }
7469       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7470     }
7471   }
7472 
7473   // fold (aext (truncate x))
7474   if (N0.getOpcode() == ISD::TRUNCATE) {
7475     SDValue TruncOp = N0.getOperand(0);
7476     if (TruncOp.getValueType() == VT)
7477       return TruncOp; // x iff x size == zext size.
7478     if (TruncOp.getValueType().bitsGT(VT))
7479       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
7480     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
7481   }
7482 
7483   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7484   // if the trunc is not free.
7485   if (N0.getOpcode() == ISD::AND &&
7486       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7487       N0.getOperand(1).getOpcode() == ISD::Constant &&
7488       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7489                           N0.getValueType())) {
7490     SDLoc DL(N);
7491     SDValue X = N0.getOperand(0).getOperand(0);
7492     if (X.getValueType().bitsLT(VT)) {
7493       X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
7494     } else if (X.getValueType().bitsGT(VT)) {
7495       X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
7496     }
7497     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7498     Mask = Mask.zext(VT.getSizeInBits());
7499     return DAG.getNode(ISD::AND, DL, VT,
7500                        X, DAG.getConstant(Mask, DL, VT));
7501   }
7502 
7503   // fold (aext (load x)) -> (aext (truncate (extload x)))
7504   // None of the supported targets knows how to perform load and any_ext
7505   // on vectors in one instruction.  We only perform this transformation on
7506   // scalars.
7507   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7508       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7509       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7510     bool DoXform = true;
7511     SmallVector<SDNode*, 4> SetCCs;
7512     if (!N0.hasOneUse())
7513       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7514     if (DoXform) {
7515       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7516       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7517                                        LN0->getChain(),
7518                                        LN0->getBasePtr(), N0.getValueType(),
7519                                        LN0->getMemOperand());
7520       CombineTo(N, ExtLoad);
7521       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7522                                   N0.getValueType(), ExtLoad);
7523       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7524       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7525                       ISD::ANY_EXTEND);
7526       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7527     }
7528   }
7529 
7530   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7531   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7532   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7533   if (N0.getOpcode() == ISD::LOAD &&
7534       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7535       N0.hasOneUse()) {
7536     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7537     ISD::LoadExtType ExtType = LN0->getExtensionType();
7538     EVT MemVT = LN0->getMemoryVT();
7539     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7540       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7541                                        VT, LN0->getChain(), LN0->getBasePtr(),
7542                                        MemVT, LN0->getMemOperand());
7543       CombineTo(N, ExtLoad);
7544       CombineTo(N0.getNode(),
7545                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7546                             N0.getValueType(), ExtLoad),
7547                 ExtLoad.getValue(1));
7548       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7549     }
7550   }
7551 
7552   if (N0.getOpcode() == ISD::SETCC) {
7553     // For vectors:
7554     // aext(setcc) -> vsetcc
7555     // aext(setcc) -> truncate(vsetcc)
7556     // aext(setcc) -> aext(vsetcc)
7557     // Only do this before legalize for now.
7558     if (VT.isVector() && !LegalOperations) {
7559       EVT N0VT = N0.getOperand(0).getValueType();
7560         // We know that the # elements of the results is the same as the
7561         // # elements of the compare (and the # elements of the compare result
7562         // for that matter).  Check to see that they are the same size.  If so,
7563         // we know that the element size of the sext'd result matches the
7564         // element size of the compare operands.
7565       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7566         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7567                              N0.getOperand(1),
7568                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7569       // If the desired elements are smaller or larger than the source
7570       // elements we can use a matching integer vector type and then
7571       // truncate/any extend
7572       else {
7573         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7574         SDValue VsetCC =
7575           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7576                         N0.getOperand(1),
7577                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7578         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7579       }
7580     }
7581 
7582     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7583     SDLoc DL(N);
7584     if (SDValue SCC = SimplifySelectCC(
7585             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7586             DAG.getConstant(0, DL, VT),
7587             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7588       return SCC;
7589   }
7590 
7591   return SDValue();
7592 }
7593 
7594 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7595   SDValue N0 = N->getOperand(0);
7596   SDValue N1 = N->getOperand(1);
7597   EVT EVT = cast<VTSDNode>(N1)->getVT();
7598 
7599   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7600   if (N0.getOpcode() == ISD::AssertZext &&
7601       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7602     return N0;
7603 
7604   return SDValue();
7605 }
7606 
7607 /// See if the specified operand can be simplified with the knowledge that only
7608 /// the bits specified by Mask are used.  If so, return the simpler operand,
7609 /// otherwise return a null SDValue.
7610 ///
7611 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
7612 /// simplify nodes with multiple uses more aggressively.)
7613 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
7614   switch (V.getOpcode()) {
7615   default: break;
7616   case ISD::Constant: {
7617     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7618     assert(CV && "Const value should be ConstSDNode.");
7619     const APInt &CVal = CV->getAPIntValue();
7620     APInt NewVal = CVal & Mask;
7621     if (NewVal != CVal)
7622       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7623     break;
7624   }
7625   case ISD::OR:
7626   case ISD::XOR:
7627     // If the LHS or RHS don't contribute bits to the or, drop them.
7628     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7629       return V.getOperand(1);
7630     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7631       return V.getOperand(0);
7632     break;
7633   case ISD::SRL:
7634     // Only look at single-use SRLs.
7635     if (!V.getNode()->hasOneUse())
7636       break;
7637     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7638       // See if we can recursively simplify the LHS.
7639       unsigned Amt = RHSC->getZExtValue();
7640 
7641       // Watch out for shift count overflow though.
7642       if (Amt >= Mask.getBitWidth()) break;
7643       APInt NewMask = Mask << Amt;
7644       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7645         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7646                            SimplifyLHS, V.getOperand(1));
7647     }
7648     break;
7649   case ISD::AND: {
7650     // X & -1 -> X (ignoring bits which aren't demanded).
7651     ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
7652     if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
7653       return V.getOperand(0);
7654     break;
7655   }
7656   }
7657   return SDValue();
7658 }
7659 
7660 /// If the result of a wider load is shifted to right of N  bits and then
7661 /// truncated to a narrower type and where N is a multiple of number of bits of
7662 /// the narrower type, transform it to a narrower load from address + N / num of
7663 /// bits of new type. If the result is to be extended, also fold the extension
7664 /// to form a extending load.
7665 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7666   unsigned Opc = N->getOpcode();
7667 
7668   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7669   SDValue N0 = N->getOperand(0);
7670   EVT VT = N->getValueType(0);
7671   EVT ExtVT = VT;
7672 
7673   // This transformation isn't valid for vector loads.
7674   if (VT.isVector())
7675     return SDValue();
7676 
7677   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7678   // extended to VT.
7679   if (Opc == ISD::SIGN_EXTEND_INREG) {
7680     ExtType = ISD::SEXTLOAD;
7681     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7682   } else if (Opc == ISD::SRL) {
7683     // Another special-case: SRL is basically zero-extending a narrower value.
7684     ExtType = ISD::ZEXTLOAD;
7685     N0 = SDValue(N, 0);
7686     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7687     if (!N01) return SDValue();
7688     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7689                               VT.getSizeInBits() - N01->getZExtValue());
7690   }
7691   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7692     return SDValue();
7693 
7694   unsigned EVTBits = ExtVT.getSizeInBits();
7695 
7696   // Do not generate loads of non-round integer types since these can
7697   // be expensive (and would be wrong if the type is not byte sized).
7698   if (!ExtVT.isRound())
7699     return SDValue();
7700 
7701   unsigned ShAmt = 0;
7702   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7703     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7704       ShAmt = N01->getZExtValue();
7705       // Is the shift amount a multiple of size of VT?
7706       if ((ShAmt & (EVTBits-1)) == 0) {
7707         N0 = N0.getOperand(0);
7708         // Is the load width a multiple of size of VT?
7709         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7710           return SDValue();
7711       }
7712 
7713       // At this point, we must have a load or else we can't do the transform.
7714       if (!isa<LoadSDNode>(N0)) return SDValue();
7715 
7716       // Because a SRL must be assumed to *need* to zero-extend the high bits
7717       // (as opposed to anyext the high bits), we can't combine the zextload
7718       // lowering of SRL and an sextload.
7719       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7720         return SDValue();
7721 
7722       // If the shift amount is larger than the input type then we're not
7723       // accessing any of the loaded bytes.  If the load was a zextload/extload
7724       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7725       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7726         return SDValue();
7727     }
7728   }
7729 
7730   // If the load is shifted left (and the result isn't shifted back right),
7731   // we can fold the truncate through the shift.
7732   unsigned ShLeftAmt = 0;
7733   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7734       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7735     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7736       ShLeftAmt = N01->getZExtValue();
7737       N0 = N0.getOperand(0);
7738     }
7739   }
7740 
7741   // If we haven't found a load, we can't narrow it.  Don't transform one with
7742   // multiple uses, this would require adding a new load.
7743   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7744     return SDValue();
7745 
7746   // Don't change the width of a volatile load.
7747   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7748   if (LN0->isVolatile())
7749     return SDValue();
7750 
7751   // Verify that we are actually reducing a load width here.
7752   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7753     return SDValue();
7754 
7755   // For the transform to be legal, the load must produce only two values
7756   // (the value loaded and the chain).  Don't transform a pre-increment
7757   // load, for example, which produces an extra value.  Otherwise the
7758   // transformation is not equivalent, and the downstream logic to replace
7759   // uses gets things wrong.
7760   if (LN0->getNumValues() > 2)
7761     return SDValue();
7762 
7763   // If the load that we're shrinking is an extload and we're not just
7764   // discarding the extension we can't simply shrink the load. Bail.
7765   // TODO: It would be possible to merge the extensions in some cases.
7766   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7767       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7768     return SDValue();
7769 
7770   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7771     return SDValue();
7772 
7773   EVT PtrType = N0.getOperand(1).getValueType();
7774 
7775   if (PtrType == MVT::Untyped || PtrType.isExtended())
7776     // It's not possible to generate a constant of extended or untyped type.
7777     return SDValue();
7778 
7779   // For big endian targets, we need to adjust the offset to the pointer to
7780   // load the correct bytes.
7781   if (DAG.getDataLayout().isBigEndian()) {
7782     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7783     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7784     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7785   }
7786 
7787   uint64_t PtrOff = ShAmt / 8;
7788   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7789   SDLoc DL(LN0);
7790   // The original load itself didn't wrap, so an offset within it doesn't.
7791   SDNodeFlags Flags;
7792   Flags.setNoUnsignedWrap(true);
7793   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7794                                PtrType, LN0->getBasePtr(),
7795                                DAG.getConstant(PtrOff, DL, PtrType),
7796                                &Flags);
7797   AddToWorklist(NewPtr.getNode());
7798 
7799   SDValue Load;
7800   if (ExtType == ISD::NON_EXTLOAD)
7801     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7802                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7803                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7804   else
7805     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7806                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7807                           NewAlign, LN0->getMemOperand()->getFlags(),
7808                           LN0->getAAInfo());
7809 
7810   // Replace the old load's chain with the new load's chain.
7811   WorklistRemover DeadNodes(*this);
7812   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7813 
7814   // Shift the result left, if we've swallowed a left shift.
7815   SDValue Result = Load;
7816   if (ShLeftAmt != 0) {
7817     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7818     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7819       ShImmTy = VT;
7820     // If the shift amount is as large as the result size (but, presumably,
7821     // no larger than the source) then the useful bits of the result are
7822     // zero; we can't simply return the shortened shift, because the result
7823     // of that operation is undefined.
7824     SDLoc DL(N0);
7825     if (ShLeftAmt >= VT.getSizeInBits())
7826       Result = DAG.getConstant(0, DL, VT);
7827     else
7828       Result = DAG.getNode(ISD::SHL, DL, VT,
7829                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7830   }
7831 
7832   // Return the new loaded value.
7833   return Result;
7834 }
7835 
7836 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7837   SDValue N0 = N->getOperand(0);
7838   SDValue N1 = N->getOperand(1);
7839   EVT VT = N->getValueType(0);
7840   EVT EVT = cast<VTSDNode>(N1)->getVT();
7841   unsigned VTBits = VT.getScalarSizeInBits();
7842   unsigned EVTBits = EVT.getScalarSizeInBits();
7843 
7844   if (N0.isUndef())
7845     return DAG.getUNDEF(VT);
7846 
7847   // fold (sext_in_reg c1) -> c1
7848   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7849     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7850 
7851   // If the input is already sign extended, just drop the extension.
7852   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7853     return N0;
7854 
7855   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7856   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7857       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7858     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7859                        N0.getOperand(0), N1);
7860 
7861   // fold (sext_in_reg (sext x)) -> (sext x)
7862   // fold (sext_in_reg (aext x)) -> (sext x)
7863   // if x is small enough.
7864   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7865     SDValue N00 = N0.getOperand(0);
7866     if (N00.getScalarValueSizeInBits() <= EVTBits &&
7867         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7868       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7869   }
7870 
7871   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
7872   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
7873        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
7874        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
7875       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
7876     if (!LegalOperations ||
7877         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
7878       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
7879   }
7880 
7881   // fold (sext_in_reg (zext x)) -> (sext x)
7882   // iff we are extending the source sign bit.
7883   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
7884     SDValue N00 = N0.getOperand(0);
7885     if (N00.getScalarValueSizeInBits() == EVTBits &&
7886         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7887       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7888   }
7889 
7890   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7891   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
7892     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7893 
7894   // fold operands of sext_in_reg based on knowledge that the top bits are not
7895   // demanded.
7896   if (SimplifyDemandedBits(SDValue(N, 0)))
7897     return SDValue(N, 0);
7898 
7899   // fold (sext_in_reg (load x)) -> (smaller sextload x)
7900   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7901   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7902     return NarrowLoad;
7903 
7904   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7905   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7906   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7907   if (N0.getOpcode() == ISD::SRL) {
7908     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7909       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7910         // We can turn this into an SRA iff the input to the SRL is already sign
7911         // extended enough.
7912         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7913         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7914           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7915                              N0.getOperand(0), N0.getOperand(1));
7916       }
7917   }
7918 
7919   // fold (sext_inreg (extload x)) -> (sextload x)
7920   if (ISD::isEXTLoad(N0.getNode()) &&
7921       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7922       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7923       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7924        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7925     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7926     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7927                                      LN0->getChain(),
7928                                      LN0->getBasePtr(), EVT,
7929                                      LN0->getMemOperand());
7930     CombineTo(N, ExtLoad);
7931     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7932     AddToWorklist(ExtLoad.getNode());
7933     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7934   }
7935   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7936   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7937       N0.hasOneUse() &&
7938       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7939       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7940        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7941     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7942     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7943                                      LN0->getChain(),
7944                                      LN0->getBasePtr(), EVT,
7945                                      LN0->getMemOperand());
7946     CombineTo(N, ExtLoad);
7947     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7948     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7949   }
7950 
7951   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7952   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7953     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7954                                            N0.getOperand(1), false))
7955       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7956                          BSwap, N1);
7957   }
7958 
7959   return SDValue();
7960 }
7961 
7962 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7963   SDValue N0 = N->getOperand(0);
7964   EVT VT = N->getValueType(0);
7965 
7966   if (N0.isUndef())
7967     return DAG.getUNDEF(VT);
7968 
7969   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7970                                               LegalOperations))
7971     return SDValue(Res, 0);
7972 
7973   return SDValue();
7974 }
7975 
7976 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7977   SDValue N0 = N->getOperand(0);
7978   EVT VT = N->getValueType(0);
7979 
7980   if (N0.isUndef())
7981     return DAG.getUNDEF(VT);
7982 
7983   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7984                                               LegalOperations))
7985     return SDValue(Res, 0);
7986 
7987   return SDValue();
7988 }
7989 
7990 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7991   SDValue N0 = N->getOperand(0);
7992   EVT VT = N->getValueType(0);
7993   bool isLE = DAG.getDataLayout().isLittleEndian();
7994 
7995   // noop truncate
7996   if (N0.getValueType() == N->getValueType(0))
7997     return N0;
7998   // fold (truncate c1) -> c1
7999   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8000     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8001   // fold (truncate (truncate x)) -> (truncate x)
8002   if (N0.getOpcode() == ISD::TRUNCATE)
8003     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8004   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8005   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8006       N0.getOpcode() == ISD::SIGN_EXTEND ||
8007       N0.getOpcode() == ISD::ANY_EXTEND) {
8008     // if the source is smaller than the dest, we still need an extend.
8009     if (N0.getOperand(0).getValueType().bitsLT(VT))
8010       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8011     // if the source is larger than the dest, than we just need the truncate.
8012     if (N0.getOperand(0).getValueType().bitsGT(VT))
8013       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8014     // if the source and dest are the same type, we can drop both the extend
8015     // and the truncate.
8016     return N0.getOperand(0);
8017   }
8018 
8019   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8020   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8021     return SDValue();
8022 
8023   // Fold extract-and-trunc into a narrow extract. For example:
8024   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8025   //   i32 y = TRUNCATE(i64 x)
8026   //        -- becomes --
8027   //   v16i8 b = BITCAST (v2i64 val)
8028   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8029   //
8030   // Note: We only run this optimization after type legalization (which often
8031   // creates this pattern) and before operation legalization after which
8032   // we need to be more careful about the vector instructions that we generate.
8033   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8034       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8035 
8036     EVT VecTy = N0.getOperand(0).getValueType();
8037     EVT ExTy = N0.getValueType();
8038     EVT TrTy = N->getValueType(0);
8039 
8040     unsigned NumElem = VecTy.getVectorNumElements();
8041     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8042 
8043     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8044     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8045 
8046     SDValue EltNo = N0->getOperand(1);
8047     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8048       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8049       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8050       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8051 
8052       SDLoc DL(N);
8053       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8054                          DAG.getBitcast(NVT, N0.getOperand(0)),
8055                          DAG.getConstant(Index, DL, IndexTy));
8056     }
8057   }
8058 
8059   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8060   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8061     EVT SrcVT = N0.getValueType();
8062     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8063         TLI.isTruncateFree(SrcVT, VT)) {
8064       SDLoc SL(N0);
8065       SDValue Cond = N0.getOperand(0);
8066       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8067       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8068       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8069     }
8070   }
8071 
8072   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8073   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8074       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8075       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8076     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
8077       uint64_t Amt = CAmt->getZExtValue();
8078       unsigned Size = VT.getScalarSizeInBits();
8079 
8080       if (Amt < Size) {
8081         SDLoc SL(N);
8082         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8083 
8084         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8085         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
8086                            DAG.getConstant(Amt, SL, AmtVT));
8087       }
8088     }
8089   }
8090 
8091   // Fold a series of buildvector, bitcast, and truncate if possible.
8092   // For example fold
8093   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8094   //   (2xi32 (buildvector x, y)).
8095   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8096       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8097       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8098       N0.getOperand(0).hasOneUse()) {
8099 
8100     SDValue BuildVect = N0.getOperand(0);
8101     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8102     EVT TruncVecEltTy = VT.getVectorElementType();
8103 
8104     // Check that the element types match.
8105     if (BuildVectEltTy == TruncVecEltTy) {
8106       // Now we only need to compute the offset of the truncated elements.
8107       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8108       unsigned TruncVecNumElts = VT.getVectorNumElements();
8109       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8110 
8111       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8112              "Invalid number of elements");
8113 
8114       SmallVector<SDValue, 8> Opnds;
8115       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8116         Opnds.push_back(BuildVect.getOperand(i));
8117 
8118       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8119     }
8120   }
8121 
8122   // See if we can simplify the input to this truncate through knowledge that
8123   // only the low bits are being used.
8124   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8125   // Currently we only perform this optimization on scalars because vectors
8126   // may have different active low bits.
8127   if (!VT.isVector()) {
8128     if (SDValue Shorter =
8129             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
8130                                                      VT.getSizeInBits())))
8131       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8132   }
8133 
8134   // fold (truncate (load x)) -> (smaller load x)
8135   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8136   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8137     if (SDValue Reduced = ReduceLoadWidth(N))
8138       return Reduced;
8139 
8140     // Handle the case where the load remains an extending load even
8141     // after truncation.
8142     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8143       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8144       if (!LN0->isVolatile() &&
8145           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8146         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8147                                          VT, LN0->getChain(), LN0->getBasePtr(),
8148                                          LN0->getMemoryVT(),
8149                                          LN0->getMemOperand());
8150         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8151         return NewLoad;
8152       }
8153     }
8154   }
8155 
8156   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8157   // where ... are all 'undef'.
8158   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8159     SmallVector<EVT, 8> VTs;
8160     SDValue V;
8161     unsigned Idx = 0;
8162     unsigned NumDefs = 0;
8163 
8164     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8165       SDValue X = N0.getOperand(i);
8166       if (!X.isUndef()) {
8167         V = X;
8168         Idx = i;
8169         NumDefs++;
8170       }
8171       // Stop if more than one members are non-undef.
8172       if (NumDefs > 1)
8173         break;
8174       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8175                                      VT.getVectorElementType(),
8176                                      X.getValueType().getVectorNumElements()));
8177     }
8178 
8179     if (NumDefs == 0)
8180       return DAG.getUNDEF(VT);
8181 
8182     if (NumDefs == 1) {
8183       assert(V.getNode() && "The single defined operand is empty!");
8184       SmallVector<SDValue, 8> Opnds;
8185       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8186         if (i != Idx) {
8187           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8188           continue;
8189         }
8190         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8191         AddToWorklist(NV.getNode());
8192         Opnds.push_back(NV);
8193       }
8194       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8195     }
8196   }
8197 
8198   // Fold truncate of a bitcast of a vector to an extract of the low vector
8199   // element.
8200   //
8201   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
8202   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8203     SDValue VecSrc = N0.getOperand(0);
8204     EVT SrcVT = VecSrc.getValueType();
8205     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8206         (!LegalOperations ||
8207          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8208       SDLoc SL(N);
8209 
8210       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8211       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8212                          VecSrc, DAG.getConstant(0, SL, IdxVT));
8213     }
8214   }
8215 
8216   // Simplify the operands using demanded-bits information.
8217   if (!VT.isVector() &&
8218       SimplifyDemandedBits(SDValue(N, 0)))
8219     return SDValue(N, 0);
8220 
8221   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8222   // When the adde's carry is not used.
8223   if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
8224       !N0.getNode()->hasAnyUseOfValue(1) &&
8225       (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
8226     SDLoc SL(N);
8227     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8228     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8229     return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
8230                        X, Y, N0.getOperand(2));
8231   }
8232 
8233   return SDValue();
8234 }
8235 
8236 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8237   SDValue Elt = N->getOperand(i);
8238   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8239     return Elt.getNode();
8240   return Elt.getOperand(Elt.getResNo()).getNode();
8241 }
8242 
8243 /// build_pair (load, load) -> load
8244 /// if load locations are consecutive.
8245 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8246   assert(N->getOpcode() == ISD::BUILD_PAIR);
8247 
8248   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8249   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8250   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8251       LD1->getAddressSpace() != LD2->getAddressSpace())
8252     return SDValue();
8253   EVT LD1VT = LD1->getValueType(0);
8254   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8255   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8256       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8257     unsigned Align = LD1->getAlignment();
8258     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8259         VT.getTypeForEVT(*DAG.getContext()));
8260 
8261     if (NewAlign <= Align &&
8262         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8263       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8264                          LD1->getPointerInfo(), Align);
8265   }
8266 
8267   return SDValue();
8268 }
8269 
8270 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8271   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8272   // and Lo parts; on big-endian machines it doesn't.
8273   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8274 }
8275 
8276 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8277                                     const TargetLowering &TLI) {
8278   // If this is not a bitcast to an FP type or if the target doesn't have
8279   // IEEE754-compliant FP logic, we're done.
8280   EVT VT = N->getValueType(0);
8281   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8282     return SDValue();
8283 
8284   // TODO: Use splat values for the constant-checking below and remove this
8285   // restriction.
8286   SDValue N0 = N->getOperand(0);
8287   EVT SourceVT = N0.getValueType();
8288   if (SourceVT.isVector())
8289     return SDValue();
8290 
8291   unsigned FPOpcode;
8292   APInt SignMask;
8293   switch (N0.getOpcode()) {
8294   case ISD::AND:
8295     FPOpcode = ISD::FABS;
8296     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
8297     break;
8298   case ISD::XOR:
8299     FPOpcode = ISD::FNEG;
8300     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
8301     break;
8302   // TODO: ISD::OR --> ISD::FNABS?
8303   default:
8304     return SDValue();
8305   }
8306 
8307   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8308   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8309   SDValue LogicOp0 = N0.getOperand(0);
8310   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8311   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8312       LogicOp0.getOpcode() == ISD::BITCAST &&
8313       LogicOp0->getOperand(0).getValueType() == VT)
8314     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8315 
8316   return SDValue();
8317 }
8318 
8319 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8320   SDValue N0 = N->getOperand(0);
8321   EVT VT = N->getValueType(0);
8322 
8323   if (N0.isUndef())
8324     return DAG.getUNDEF(VT);
8325 
8326   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8327   // Only do this before legalize, since afterward the target may be depending
8328   // on the bitconvert.
8329   // First check to see if this is all constant.
8330   if (!LegalTypes &&
8331       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8332       VT.isVector()) {
8333     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8334 
8335     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8336     assert(!DestEltVT.isVector() &&
8337            "Element type of vector ValueType must not be vector!");
8338     if (isSimple)
8339       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8340   }
8341 
8342   // If the input is a constant, let getNode fold it.
8343   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8344     // If we can't allow illegal operations, we need to check that this is just
8345     // a fp -> int or int -> conversion and that the resulting operation will
8346     // be legal.
8347     if (!LegalOperations ||
8348         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8349          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8350         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8351          TLI.isOperationLegal(ISD::Constant, VT)))
8352       return DAG.getBitcast(VT, N0);
8353   }
8354 
8355   // (conv (conv x, t1), t2) -> (conv x, t2)
8356   if (N0.getOpcode() == ISD::BITCAST)
8357     return DAG.getBitcast(VT, N0.getOperand(0));
8358 
8359   // fold (conv (load x)) -> (load (conv*)x)
8360   // If the resultant load doesn't need a higher alignment than the original!
8361   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8362       // Do not change the width of a volatile load.
8363       !cast<LoadSDNode>(N0)->isVolatile() &&
8364       // Do not remove the cast if the types differ in endian layout.
8365       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8366           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8367       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8368       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8369     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8370     unsigned OrigAlign = LN0->getAlignment();
8371 
8372     bool Fast = false;
8373     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8374                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8375         Fast) {
8376       SDValue Load =
8377           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8378                       LN0->getPointerInfo(), OrigAlign,
8379                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8380       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8381       return Load;
8382     }
8383   }
8384 
8385   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8386     return V;
8387 
8388   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8389   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8390   //
8391   // For ppc_fp128:
8392   // fold (bitcast (fneg x)) ->
8393   //     flipbit = signbit
8394   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8395   //
8396   // fold (bitcast (fabs x)) ->
8397   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8398   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8399   // This often reduces constant pool loads.
8400   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8401        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8402       N0.getNode()->hasOneUse() && VT.isInteger() &&
8403       !VT.isVector() && !N0.getValueType().isVector()) {
8404     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8405     AddToWorklist(NewConv.getNode());
8406 
8407     SDLoc DL(N);
8408     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8409       assert(VT.getSizeInBits() == 128);
8410       SDValue SignBit = DAG.getConstant(
8411           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8412       SDValue FlipBit;
8413       if (N0.getOpcode() == ISD::FNEG) {
8414         FlipBit = SignBit;
8415         AddToWorklist(FlipBit.getNode());
8416       } else {
8417         assert(N0.getOpcode() == ISD::FABS);
8418         SDValue Hi =
8419             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8420                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8421                                               SDLoc(NewConv)));
8422         AddToWorklist(Hi.getNode());
8423         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8424         AddToWorklist(FlipBit.getNode());
8425       }
8426       SDValue FlipBits =
8427           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8428       AddToWorklist(FlipBits.getNode());
8429       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8430     }
8431     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
8432     if (N0.getOpcode() == ISD::FNEG)
8433       return DAG.getNode(ISD::XOR, DL, VT,
8434                          NewConv, DAG.getConstant(SignBit, DL, VT));
8435     assert(N0.getOpcode() == ISD::FABS);
8436     return DAG.getNode(ISD::AND, DL, VT,
8437                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8438   }
8439 
8440   // fold (bitconvert (fcopysign cst, x)) ->
8441   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8442   // Note that we don't handle (copysign x, cst) because this can always be
8443   // folded to an fneg or fabs.
8444   //
8445   // For ppc_fp128:
8446   // fold (bitcast (fcopysign cst, x)) ->
8447   //     flipbit = (and (extract_element
8448   //                     (xor (bitcast cst), (bitcast x)), 0),
8449   //                    signbit)
8450   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8451   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8452       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8453       VT.isInteger() && !VT.isVector()) {
8454     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8455     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8456     if (isTypeLegal(IntXVT)) {
8457       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8458       AddToWorklist(X.getNode());
8459 
8460       // If X has a different width than the result/lhs, sext it or truncate it.
8461       unsigned VTWidth = VT.getSizeInBits();
8462       if (OrigXWidth < VTWidth) {
8463         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8464         AddToWorklist(X.getNode());
8465       } else if (OrigXWidth > VTWidth) {
8466         // To get the sign bit in the right place, we have to shift it right
8467         // before truncating.
8468         SDLoc DL(X);
8469         X = DAG.getNode(ISD::SRL, DL,
8470                         X.getValueType(), X,
8471                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8472                                         X.getValueType()));
8473         AddToWorklist(X.getNode());
8474         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8475         AddToWorklist(X.getNode());
8476       }
8477 
8478       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8479         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
8480         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8481         AddToWorklist(Cst.getNode());
8482         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8483         AddToWorklist(X.getNode());
8484         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8485         AddToWorklist(XorResult.getNode());
8486         SDValue XorResult64 = DAG.getNode(
8487             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8488             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8489                                   SDLoc(XorResult)));
8490         AddToWorklist(XorResult64.getNode());
8491         SDValue FlipBit =
8492             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8493                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8494         AddToWorklist(FlipBit.getNode());
8495         SDValue FlipBits =
8496             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8497         AddToWorklist(FlipBits.getNode());
8498         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8499       }
8500       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
8501       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8502                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8503       AddToWorklist(X.getNode());
8504 
8505       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8506       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8507                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8508       AddToWorklist(Cst.getNode());
8509 
8510       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8511     }
8512   }
8513 
8514   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8515   if (N0.getOpcode() == ISD::BUILD_PAIR)
8516     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8517       return CombineLD;
8518 
8519   // Remove double bitcasts from shuffles - this is often a legacy of
8520   // XformToShuffleWithZero being used to combine bitmaskings (of
8521   // float vectors bitcast to integer vectors) into shuffles.
8522   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8523   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8524       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8525       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8526       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8527     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8528 
8529     // If operands are a bitcast, peek through if it casts the original VT.
8530     // If operands are a constant, just bitcast back to original VT.
8531     auto PeekThroughBitcast = [&](SDValue Op) {
8532       if (Op.getOpcode() == ISD::BITCAST &&
8533           Op.getOperand(0).getValueType() == VT)
8534         return SDValue(Op.getOperand(0));
8535       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8536           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8537         return DAG.getBitcast(VT, Op);
8538       return SDValue();
8539     };
8540 
8541     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8542     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8543     if (!(SV0 && SV1))
8544       return SDValue();
8545 
8546     int MaskScale =
8547         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8548     SmallVector<int, 8> NewMask;
8549     for (int M : SVN->getMask())
8550       for (int i = 0; i != MaskScale; ++i)
8551         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8552 
8553     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8554     if (!LegalMask) {
8555       std::swap(SV0, SV1);
8556       ShuffleVectorSDNode::commuteMask(NewMask);
8557       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8558     }
8559 
8560     if (LegalMask)
8561       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8562   }
8563 
8564   return SDValue();
8565 }
8566 
8567 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8568   EVT VT = N->getValueType(0);
8569   return CombineConsecutiveLoads(N, VT);
8570 }
8571 
8572 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8573 /// operands. DstEltVT indicates the destination element value type.
8574 SDValue DAGCombiner::
8575 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8576   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8577 
8578   // If this is already the right type, we're done.
8579   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8580 
8581   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8582   unsigned DstBitSize = DstEltVT.getSizeInBits();
8583 
8584   // If this is a conversion of N elements of one type to N elements of another
8585   // type, convert each element.  This handles FP<->INT cases.
8586   if (SrcBitSize == DstBitSize) {
8587     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8588                               BV->getValueType(0).getVectorNumElements());
8589 
8590     // Due to the FP element handling below calling this routine recursively,
8591     // we can end up with a scalar-to-vector node here.
8592     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8593       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8594                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8595 
8596     SmallVector<SDValue, 8> Ops;
8597     for (SDValue Op : BV->op_values()) {
8598       // If the vector element type is not legal, the BUILD_VECTOR operands
8599       // are promoted and implicitly truncated.  Make that explicit here.
8600       if (Op.getValueType() != SrcEltVT)
8601         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8602       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8603       AddToWorklist(Ops.back().getNode());
8604     }
8605     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8606   }
8607 
8608   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8609   // handle annoying details of growing/shrinking FP values, we convert them to
8610   // int first.
8611   if (SrcEltVT.isFloatingPoint()) {
8612     // Convert the input float vector to a int vector where the elements are the
8613     // same sizes.
8614     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8615     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8616     SrcEltVT = IntVT;
8617   }
8618 
8619   // Now we know the input is an integer vector.  If the output is a FP type,
8620   // convert to integer first, then to FP of the right size.
8621   if (DstEltVT.isFloatingPoint()) {
8622     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8623     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8624 
8625     // Next, convert to FP elements of the same size.
8626     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8627   }
8628 
8629   SDLoc DL(BV);
8630 
8631   // Okay, we know the src/dst types are both integers of differing types.
8632   // Handling growing first.
8633   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8634   if (SrcBitSize < DstBitSize) {
8635     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8636 
8637     SmallVector<SDValue, 8> Ops;
8638     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8639          i += NumInputsPerOutput) {
8640       bool isLE = DAG.getDataLayout().isLittleEndian();
8641       APInt NewBits = APInt(DstBitSize, 0);
8642       bool EltIsUndef = true;
8643       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8644         // Shift the previously computed bits over.
8645         NewBits <<= SrcBitSize;
8646         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8647         if (Op.isUndef()) continue;
8648         EltIsUndef = false;
8649 
8650         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8651                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8652       }
8653 
8654       if (EltIsUndef)
8655         Ops.push_back(DAG.getUNDEF(DstEltVT));
8656       else
8657         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8658     }
8659 
8660     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8661     return DAG.getBuildVector(VT, DL, Ops);
8662   }
8663 
8664   // Finally, this must be the case where we are shrinking elements: each input
8665   // turns into multiple outputs.
8666   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8667   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8668                             NumOutputsPerInput*BV->getNumOperands());
8669   SmallVector<SDValue, 8> Ops;
8670 
8671   for (const SDValue &Op : BV->op_values()) {
8672     if (Op.isUndef()) {
8673       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8674       continue;
8675     }
8676 
8677     APInt OpVal = cast<ConstantSDNode>(Op)->
8678                   getAPIntValue().zextOrTrunc(SrcBitSize);
8679 
8680     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8681       APInt ThisVal = OpVal.trunc(DstBitSize);
8682       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8683       OpVal = OpVal.lshr(DstBitSize);
8684     }
8685 
8686     // For big endian targets, swap the order of the pieces of each element.
8687     if (DAG.getDataLayout().isBigEndian())
8688       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8689   }
8690 
8691   return DAG.getBuildVector(VT, DL, Ops);
8692 }
8693 
8694 /// Try to perform FMA combining on a given FADD node.
8695 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8696   SDValue N0 = N->getOperand(0);
8697   SDValue N1 = N->getOperand(1);
8698   EVT VT = N->getValueType(0);
8699   SDLoc SL(N);
8700 
8701   const TargetOptions &Options = DAG.getTarget().Options;
8702   bool AllowFusion =
8703       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8704 
8705   // Floating-point multiply-add with intermediate rounding.
8706   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8707 
8708   // Floating-point multiply-add without intermediate rounding.
8709   bool HasFMA =
8710       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8711       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8712 
8713   // No valid opcode, do not combine.
8714   if (!HasFMAD && !HasFMA)
8715     return SDValue();
8716 
8717   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8718   ;
8719   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8720     return SDValue();
8721 
8722   // Always prefer FMAD to FMA for precision.
8723   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8724   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8725   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8726 
8727   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8728   // prefer to fold the multiply with fewer uses.
8729   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
8730       N1.getOpcode() == ISD::FMUL) {
8731     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8732       std::swap(N0, N1);
8733   }
8734 
8735   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8736   if (N0.getOpcode() == ISD::FMUL &&
8737       (Aggressive || N0->hasOneUse())) {
8738     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8739                        N0.getOperand(0), N0.getOperand(1), N1);
8740   }
8741 
8742   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8743   // Note: Commutes FADD operands.
8744   if (N1.getOpcode() == ISD::FMUL &&
8745       (Aggressive || N1->hasOneUse())) {
8746     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8747                        N1.getOperand(0), N1.getOperand(1), N0);
8748   }
8749 
8750   // Look through FP_EXTEND nodes to do more combining.
8751   if (AllowFusion && LookThroughFPExt) {
8752     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8753     if (N0.getOpcode() == ISD::FP_EXTEND) {
8754       SDValue N00 = N0.getOperand(0);
8755       if (N00.getOpcode() == ISD::FMUL)
8756         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8757                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8758                                        N00.getOperand(0)),
8759                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8760                                        N00.getOperand(1)), N1);
8761     }
8762 
8763     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
8764     // Note: Commutes FADD operands.
8765     if (N1.getOpcode() == ISD::FP_EXTEND) {
8766       SDValue N10 = N1.getOperand(0);
8767       if (N10.getOpcode() == ISD::FMUL)
8768         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8769                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8770                                        N10.getOperand(0)),
8771                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8772                                        N10.getOperand(1)), N0);
8773     }
8774   }
8775 
8776   // More folding opportunities when target permits.
8777   if (Aggressive) {
8778     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8779     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8780     // are currently only supported on binary nodes.
8781     if (Options.UnsafeFPMath &&
8782         N0.getOpcode() == PreferredFusedOpcode &&
8783         N0.getOperand(2).getOpcode() == ISD::FMUL &&
8784         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8785       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8786                          N0.getOperand(0), N0.getOperand(1),
8787                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8788                                      N0.getOperand(2).getOperand(0),
8789                                      N0.getOperand(2).getOperand(1),
8790                                      N1));
8791     }
8792 
8793     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8794     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8795     // are currently only supported on binary nodes.
8796     if (Options.UnsafeFPMath &&
8797         N1->getOpcode() == PreferredFusedOpcode &&
8798         N1.getOperand(2).getOpcode() == ISD::FMUL &&
8799         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
8800       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8801                          N1.getOperand(0), N1.getOperand(1),
8802                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8803                                      N1.getOperand(2).getOperand(0),
8804                                      N1.getOperand(2).getOperand(1),
8805                                      N0));
8806     }
8807 
8808     if (AllowFusion && LookThroughFPExt) {
8809       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8810       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8811       auto FoldFAddFMAFPExtFMul = [&] (
8812           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8813         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8814                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8815                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8816                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8817                                        Z));
8818       };
8819       if (N0.getOpcode() == PreferredFusedOpcode) {
8820         SDValue N02 = N0.getOperand(2);
8821         if (N02.getOpcode() == ISD::FP_EXTEND) {
8822           SDValue N020 = N02.getOperand(0);
8823           if (N020.getOpcode() == ISD::FMUL)
8824             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8825                                         N020.getOperand(0), N020.getOperand(1),
8826                                         N1);
8827         }
8828       }
8829 
8830       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8831       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8832       // FIXME: This turns two single-precision and one double-precision
8833       // operation into two double-precision operations, which might not be
8834       // interesting for all targets, especially GPUs.
8835       auto FoldFAddFPExtFMAFMul = [&] (
8836           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8837         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8838                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8839                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8840                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8841                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8842                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8843                                        Z));
8844       };
8845       if (N0.getOpcode() == ISD::FP_EXTEND) {
8846         SDValue N00 = N0.getOperand(0);
8847         if (N00.getOpcode() == PreferredFusedOpcode) {
8848           SDValue N002 = N00.getOperand(2);
8849           if (N002.getOpcode() == ISD::FMUL)
8850             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
8851                                         N002.getOperand(0), N002.getOperand(1),
8852                                         N1);
8853         }
8854       }
8855 
8856       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
8857       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
8858       if (N1.getOpcode() == PreferredFusedOpcode) {
8859         SDValue N12 = N1.getOperand(2);
8860         if (N12.getOpcode() == ISD::FP_EXTEND) {
8861           SDValue N120 = N12.getOperand(0);
8862           if (N120.getOpcode() == ISD::FMUL)
8863             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
8864                                         N120.getOperand(0), N120.getOperand(1),
8865                                         N0);
8866         }
8867       }
8868 
8869       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
8870       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
8871       // FIXME: This turns two single-precision and one double-precision
8872       // operation into two double-precision operations, which might not be
8873       // interesting for all targets, especially GPUs.
8874       if (N1.getOpcode() == ISD::FP_EXTEND) {
8875         SDValue N10 = N1.getOperand(0);
8876         if (N10.getOpcode() == PreferredFusedOpcode) {
8877           SDValue N102 = N10.getOperand(2);
8878           if (N102.getOpcode() == ISD::FMUL)
8879             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
8880                                         N102.getOperand(0), N102.getOperand(1),
8881                                         N0);
8882         }
8883       }
8884     }
8885   }
8886 
8887   return SDValue();
8888 }
8889 
8890 /// Try to perform FMA combining on a given FSUB node.
8891 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8892   SDValue N0 = N->getOperand(0);
8893   SDValue N1 = N->getOperand(1);
8894   EVT VT = N->getValueType(0);
8895   SDLoc SL(N);
8896 
8897   const TargetOptions &Options = DAG.getTarget().Options;
8898   bool AllowFusion =
8899       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8900 
8901   // Floating-point multiply-add with intermediate rounding.
8902   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8903 
8904   // Floating-point multiply-add without intermediate rounding.
8905   bool HasFMA =
8906       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8907       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8908 
8909   // No valid opcode, do not combine.
8910   if (!HasFMAD && !HasFMA)
8911     return SDValue();
8912 
8913   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8914   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8915     return SDValue();
8916 
8917   // Always prefer FMAD to FMA for precision.
8918   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8919   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8920   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8921 
8922   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8923   if (N0.getOpcode() == ISD::FMUL &&
8924       (Aggressive || N0->hasOneUse())) {
8925     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8926                        N0.getOperand(0), N0.getOperand(1),
8927                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8928   }
8929 
8930   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8931   // Note: Commutes FSUB operands.
8932   if (N1.getOpcode() == ISD::FMUL &&
8933       (Aggressive || N1->hasOneUse()))
8934     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8935                        DAG.getNode(ISD::FNEG, SL, VT,
8936                                    N1.getOperand(0)),
8937                        N1.getOperand(1), N0);
8938 
8939   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8940   if (N0.getOpcode() == ISD::FNEG &&
8941       N0.getOperand(0).getOpcode() == ISD::FMUL &&
8942       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8943     SDValue N00 = N0.getOperand(0).getOperand(0);
8944     SDValue N01 = N0.getOperand(0).getOperand(1);
8945     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8946                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8947                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8948   }
8949 
8950   // Look through FP_EXTEND nodes to do more combining.
8951   if (AllowFusion && LookThroughFPExt) {
8952     // fold (fsub (fpext (fmul x, y)), z)
8953     //   -> (fma (fpext x), (fpext y), (fneg z))
8954     if (N0.getOpcode() == ISD::FP_EXTEND) {
8955       SDValue N00 = N0.getOperand(0);
8956       if (N00.getOpcode() == ISD::FMUL)
8957         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8958                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8959                                        N00.getOperand(0)),
8960                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8961                                        N00.getOperand(1)),
8962                            DAG.getNode(ISD::FNEG, SL, VT, N1));
8963     }
8964 
8965     // fold (fsub x, (fpext (fmul y, z)))
8966     //   -> (fma (fneg (fpext y)), (fpext z), x)
8967     // Note: Commutes FSUB operands.
8968     if (N1.getOpcode() == ISD::FP_EXTEND) {
8969       SDValue N10 = N1.getOperand(0);
8970       if (N10.getOpcode() == ISD::FMUL)
8971         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8972                            DAG.getNode(ISD::FNEG, SL, VT,
8973                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
8974                                                    N10.getOperand(0))),
8975                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8976                                        N10.getOperand(1)),
8977                            N0);
8978     }
8979 
8980     // fold (fsub (fpext (fneg (fmul, x, y))), z)
8981     //   -> (fneg (fma (fpext x), (fpext y), z))
8982     // Note: This could be removed with appropriate canonicalization of the
8983     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8984     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8985     // from implementing the canonicalization in visitFSUB.
8986     if (N0.getOpcode() == ISD::FP_EXTEND) {
8987       SDValue N00 = N0.getOperand(0);
8988       if (N00.getOpcode() == ISD::FNEG) {
8989         SDValue N000 = N00.getOperand(0);
8990         if (N000.getOpcode() == ISD::FMUL) {
8991           return DAG.getNode(ISD::FNEG, SL, VT,
8992                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8993                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8994                                                      N000.getOperand(0)),
8995                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8996                                                      N000.getOperand(1)),
8997                                          N1));
8998         }
8999       }
9000     }
9001 
9002     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9003     //   -> (fneg (fma (fpext x)), (fpext y), z)
9004     // Note: This could be removed with appropriate canonicalization of the
9005     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9006     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9007     // from implementing the canonicalization in visitFSUB.
9008     if (N0.getOpcode() == ISD::FNEG) {
9009       SDValue N00 = N0.getOperand(0);
9010       if (N00.getOpcode() == ISD::FP_EXTEND) {
9011         SDValue N000 = N00.getOperand(0);
9012         if (N000.getOpcode() == ISD::FMUL) {
9013           return DAG.getNode(ISD::FNEG, SL, VT,
9014                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9015                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9016                                                      N000.getOperand(0)),
9017                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9018                                                      N000.getOperand(1)),
9019                                          N1));
9020         }
9021       }
9022     }
9023 
9024   }
9025 
9026   // More folding opportunities when target permits.
9027   if (Aggressive) {
9028     // fold (fsub (fma x, y, (fmul u, v)), z)
9029     //   -> (fma x, y (fma u, v, (fneg z)))
9030     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9031     // are currently only supported on binary nodes.
9032     if (Options.UnsafeFPMath &&
9033         N0.getOpcode() == PreferredFusedOpcode &&
9034         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9035         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9036       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9037                          N0.getOperand(0), N0.getOperand(1),
9038                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9039                                      N0.getOperand(2).getOperand(0),
9040                                      N0.getOperand(2).getOperand(1),
9041                                      DAG.getNode(ISD::FNEG, SL, VT,
9042                                                  N1)));
9043     }
9044 
9045     // fold (fsub x, (fma y, z, (fmul u, v)))
9046     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9047     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9048     // are currently only supported on binary nodes.
9049     if (Options.UnsafeFPMath &&
9050         N1.getOpcode() == PreferredFusedOpcode &&
9051         N1.getOperand(2).getOpcode() == ISD::FMUL) {
9052       SDValue N20 = N1.getOperand(2).getOperand(0);
9053       SDValue N21 = N1.getOperand(2).getOperand(1);
9054       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9055                          DAG.getNode(ISD::FNEG, SL, VT,
9056                                      N1.getOperand(0)),
9057                          N1.getOperand(1),
9058                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9059                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9060 
9061                                      N21, N0));
9062     }
9063 
9064     if (AllowFusion && LookThroughFPExt) {
9065       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9066       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9067       if (N0.getOpcode() == PreferredFusedOpcode) {
9068         SDValue N02 = N0.getOperand(2);
9069         if (N02.getOpcode() == ISD::FP_EXTEND) {
9070           SDValue N020 = N02.getOperand(0);
9071           if (N020.getOpcode() == ISD::FMUL)
9072             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9073                                N0.getOperand(0), N0.getOperand(1),
9074                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9075                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9076                                                        N020.getOperand(0)),
9077                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9078                                                        N020.getOperand(1)),
9079                                            DAG.getNode(ISD::FNEG, SL, VT,
9080                                                        N1)));
9081         }
9082       }
9083 
9084       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9085       //   -> (fma (fpext x), (fpext y),
9086       //           (fma (fpext u), (fpext v), (fneg z)))
9087       // FIXME: This turns two single-precision and one double-precision
9088       // operation into two double-precision operations, which might not be
9089       // interesting for all targets, especially GPUs.
9090       if (N0.getOpcode() == ISD::FP_EXTEND) {
9091         SDValue N00 = N0.getOperand(0);
9092         if (N00.getOpcode() == PreferredFusedOpcode) {
9093           SDValue N002 = N00.getOperand(2);
9094           if (N002.getOpcode() == ISD::FMUL)
9095             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9096                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9097                                            N00.getOperand(0)),
9098                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9099                                            N00.getOperand(1)),
9100                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9101                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9102                                                        N002.getOperand(0)),
9103                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9104                                                        N002.getOperand(1)),
9105                                            DAG.getNode(ISD::FNEG, SL, VT,
9106                                                        N1)));
9107         }
9108       }
9109 
9110       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9111       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9112       if (N1.getOpcode() == PreferredFusedOpcode &&
9113         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9114         SDValue N120 = N1.getOperand(2).getOperand(0);
9115         if (N120.getOpcode() == ISD::FMUL) {
9116           SDValue N1200 = N120.getOperand(0);
9117           SDValue N1201 = N120.getOperand(1);
9118           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9119                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9120                              N1.getOperand(1),
9121                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9122                                          DAG.getNode(ISD::FNEG, SL, VT,
9123                                              DAG.getNode(ISD::FP_EXTEND, SL,
9124                                                          VT, N1200)),
9125                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9126                                                      N1201),
9127                                          N0));
9128         }
9129       }
9130 
9131       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9132       //   -> (fma (fneg (fpext y)), (fpext z),
9133       //           (fma (fneg (fpext u)), (fpext v), x))
9134       // FIXME: This turns two single-precision and one double-precision
9135       // operation into two double-precision operations, which might not be
9136       // interesting for all targets, especially GPUs.
9137       if (N1.getOpcode() == ISD::FP_EXTEND &&
9138         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9139         SDValue N100 = N1.getOperand(0).getOperand(0);
9140         SDValue N101 = N1.getOperand(0).getOperand(1);
9141         SDValue N102 = N1.getOperand(0).getOperand(2);
9142         if (N102.getOpcode() == ISD::FMUL) {
9143           SDValue N1020 = N102.getOperand(0);
9144           SDValue N1021 = N102.getOperand(1);
9145           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9146                              DAG.getNode(ISD::FNEG, SL, VT,
9147                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9148                                                      N100)),
9149                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9150                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9151                                          DAG.getNode(ISD::FNEG, SL, VT,
9152                                              DAG.getNode(ISD::FP_EXTEND, SL,
9153                                                          VT, N1020)),
9154                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9155                                                      N1021),
9156                                          N0));
9157         }
9158       }
9159     }
9160   }
9161 
9162   return SDValue();
9163 }
9164 
9165 /// Try to perform FMA combining on a given FMUL node based on the distributive
9166 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9167 /// subtraction instead of addition).
9168 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9169   SDValue N0 = N->getOperand(0);
9170   SDValue N1 = N->getOperand(1);
9171   EVT VT = N->getValueType(0);
9172   SDLoc SL(N);
9173 
9174   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9175 
9176   const TargetOptions &Options = DAG.getTarget().Options;
9177 
9178   // The transforms below are incorrect when x == 0 and y == inf, because the
9179   // intermediate multiplication produces a nan.
9180   if (!Options.NoInfsFPMath)
9181     return SDValue();
9182 
9183   // Floating-point multiply-add without intermediate rounding.
9184   bool HasFMA =
9185       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9186       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9187       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9188 
9189   // Floating-point multiply-add with intermediate rounding. This can result
9190   // in a less precise result due to the changed rounding order.
9191   bool HasFMAD = Options.UnsafeFPMath &&
9192                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9193 
9194   // No valid opcode, do not combine.
9195   if (!HasFMAD && !HasFMA)
9196     return SDValue();
9197 
9198   // Always prefer FMAD to FMA for precision.
9199   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9200   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9201 
9202   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9203   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9204   auto FuseFADD = [&](SDValue X, SDValue Y) {
9205     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9206       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9207       if (XC1 && XC1->isExactlyValue(+1.0))
9208         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9209       if (XC1 && XC1->isExactlyValue(-1.0))
9210         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9211                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9212     }
9213     return SDValue();
9214   };
9215 
9216   if (SDValue FMA = FuseFADD(N0, N1))
9217     return FMA;
9218   if (SDValue FMA = FuseFADD(N1, N0))
9219     return FMA;
9220 
9221   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9222   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9223   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9224   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9225   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9226     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9227       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9228       if (XC0 && XC0->isExactlyValue(+1.0))
9229         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9230                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9231                            Y);
9232       if (XC0 && XC0->isExactlyValue(-1.0))
9233         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9234                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9235                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9236 
9237       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9238       if (XC1 && XC1->isExactlyValue(+1.0))
9239         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9240                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9241       if (XC1 && XC1->isExactlyValue(-1.0))
9242         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9243     }
9244     return SDValue();
9245   };
9246 
9247   if (SDValue FMA = FuseFSUB(N0, N1))
9248     return FMA;
9249   if (SDValue FMA = FuseFSUB(N1, N0))
9250     return FMA;
9251 
9252   return SDValue();
9253 }
9254 
9255 SDValue DAGCombiner::visitFADD(SDNode *N) {
9256   SDValue N0 = N->getOperand(0);
9257   SDValue N1 = N->getOperand(1);
9258   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9259   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9260   EVT VT = N->getValueType(0);
9261   SDLoc DL(N);
9262   const TargetOptions &Options = DAG.getTarget().Options;
9263   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9264 
9265   // fold vector ops
9266   if (VT.isVector())
9267     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9268       return FoldedVOp;
9269 
9270   // fold (fadd c1, c2) -> c1 + c2
9271   if (N0CFP && N1CFP)
9272     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9273 
9274   // canonicalize constant to RHS
9275   if (N0CFP && !N1CFP)
9276     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9277 
9278   if (SDValue NewSel = foldBinOpIntoSelect(N))
9279     return NewSel;
9280 
9281   // fold (fadd A, (fneg B)) -> (fsub A, B)
9282   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9283       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9284     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9285                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9286 
9287   // fold (fadd (fneg A), B) -> (fsub B, A)
9288   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9289       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9290     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9291                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9292 
9293   // FIXME: Auto-upgrade the target/function-level option.
9294   if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
9295     // fold (fadd A, 0) -> A
9296     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9297       if (N1C->isZero())
9298         return N0;
9299   }
9300 
9301   // If 'unsafe math' is enabled, fold lots of things.
9302   if (Options.UnsafeFPMath) {
9303     // No FP constant should be created after legalization as Instruction
9304     // Selection pass has a hard time dealing with FP constants.
9305     bool AllowNewConst = (Level < AfterLegalizeDAG);
9306 
9307     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9308     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9309         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9310       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9311                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9312                                      Flags),
9313                          Flags);
9314 
9315     // If allowed, fold (fadd (fneg x), x) -> 0.0
9316     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9317       return DAG.getConstantFP(0.0, DL, VT);
9318 
9319     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9320     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9321       return DAG.getConstantFP(0.0, DL, VT);
9322 
9323     // We can fold chains of FADD's of the same value into multiplications.
9324     // This transform is not safe in general because we are reducing the number
9325     // of rounding steps.
9326     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9327       if (N0.getOpcode() == ISD::FMUL) {
9328         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9329         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9330 
9331         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9332         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9333           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9334                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9335           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9336         }
9337 
9338         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9339         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9340             N1.getOperand(0) == N1.getOperand(1) &&
9341             N0.getOperand(0) == N1.getOperand(0)) {
9342           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9343                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9344           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9345         }
9346       }
9347 
9348       if (N1.getOpcode() == ISD::FMUL) {
9349         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9350         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9351 
9352         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9353         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9354           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9355                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9356           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9357         }
9358 
9359         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9360         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9361             N0.getOperand(0) == N0.getOperand(1) &&
9362             N1.getOperand(0) == N0.getOperand(0)) {
9363           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9364                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9365           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9366         }
9367       }
9368 
9369       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9370         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9371         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9372         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9373             (N0.getOperand(0) == N1)) {
9374           return DAG.getNode(ISD::FMUL, DL, VT,
9375                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9376         }
9377       }
9378 
9379       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9380         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9381         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9382         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9383             N1.getOperand(0) == N0) {
9384           return DAG.getNode(ISD::FMUL, DL, VT,
9385                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9386         }
9387       }
9388 
9389       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9390       if (AllowNewConst &&
9391           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9392           N0.getOperand(0) == N0.getOperand(1) &&
9393           N1.getOperand(0) == N1.getOperand(1) &&
9394           N0.getOperand(0) == N1.getOperand(0)) {
9395         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9396                            DAG.getConstantFP(4.0, DL, VT), Flags);
9397       }
9398     }
9399   } // enable-unsafe-fp-math
9400 
9401   // FADD -> FMA combines:
9402   if (SDValue Fused = visitFADDForFMACombine(N)) {
9403     AddToWorklist(Fused.getNode());
9404     return Fused;
9405   }
9406   return SDValue();
9407 }
9408 
9409 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9410   SDValue N0 = N->getOperand(0);
9411   SDValue N1 = N->getOperand(1);
9412   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9413   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9414   EVT VT = N->getValueType(0);
9415   SDLoc DL(N);
9416   const TargetOptions &Options = DAG.getTarget().Options;
9417   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9418 
9419   // fold vector ops
9420   if (VT.isVector())
9421     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9422       return FoldedVOp;
9423 
9424   // fold (fsub c1, c2) -> c1-c2
9425   if (N0CFP && N1CFP)
9426     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9427 
9428   if (SDValue NewSel = foldBinOpIntoSelect(N))
9429     return NewSel;
9430 
9431   // fold (fsub A, (fneg B)) -> (fadd A, B)
9432   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9433     return DAG.getNode(ISD::FADD, DL, VT, N0,
9434                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9435 
9436   // FIXME: Auto-upgrade the target/function-level option.
9437   if (Options.NoSignedZerosFPMath  || N->getFlags()->hasNoSignedZeros()) {
9438     // (fsub 0, B) -> -B
9439     if (N0CFP && N0CFP->isZero()) {
9440       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9441         return GetNegatedExpression(N1, DAG, LegalOperations);
9442       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9443         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9444     }
9445   }
9446 
9447   // If 'unsafe math' is enabled, fold lots of things.
9448   if (Options.UnsafeFPMath) {
9449     // (fsub A, 0) -> A
9450     if (N1CFP && N1CFP->isZero())
9451       return N0;
9452 
9453     // (fsub x, x) -> 0.0
9454     if (N0 == N1)
9455       return DAG.getConstantFP(0.0f, DL, VT);
9456 
9457     // (fsub x, (fadd x, y)) -> (fneg y)
9458     // (fsub x, (fadd y, x)) -> (fneg y)
9459     if (N1.getOpcode() == ISD::FADD) {
9460       SDValue N10 = N1->getOperand(0);
9461       SDValue N11 = N1->getOperand(1);
9462 
9463       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9464         return GetNegatedExpression(N11, DAG, LegalOperations);
9465 
9466       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9467         return GetNegatedExpression(N10, DAG, LegalOperations);
9468     }
9469   }
9470 
9471   // FSUB -> FMA combines:
9472   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9473     AddToWorklist(Fused.getNode());
9474     return Fused;
9475   }
9476 
9477   return SDValue();
9478 }
9479 
9480 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9481   SDValue N0 = N->getOperand(0);
9482   SDValue N1 = N->getOperand(1);
9483   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9484   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9485   EVT VT = N->getValueType(0);
9486   SDLoc DL(N);
9487   const TargetOptions &Options = DAG.getTarget().Options;
9488   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9489 
9490   // fold vector ops
9491   if (VT.isVector()) {
9492     // This just handles C1 * C2 for vectors. Other vector folds are below.
9493     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9494       return FoldedVOp;
9495   }
9496 
9497   // fold (fmul c1, c2) -> c1*c2
9498   if (N0CFP && N1CFP)
9499     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9500 
9501   // canonicalize constant to RHS
9502   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9503      !isConstantFPBuildVectorOrConstantFP(N1))
9504     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9505 
9506   // fold (fmul A, 1.0) -> A
9507   if (N1CFP && N1CFP->isExactlyValue(1.0))
9508     return N0;
9509 
9510   if (SDValue NewSel = foldBinOpIntoSelect(N))
9511     return NewSel;
9512 
9513   if (Options.UnsafeFPMath) {
9514     // fold (fmul A, 0) -> 0
9515     if (N1CFP && N1CFP->isZero())
9516       return N1;
9517 
9518     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9519     if (N0.getOpcode() == ISD::FMUL) {
9520       // Fold scalars or any vector constants (not just splats).
9521       // This fold is done in general by InstCombine, but extra fmul insts
9522       // may have been generated during lowering.
9523       SDValue N00 = N0.getOperand(0);
9524       SDValue N01 = N0.getOperand(1);
9525       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9526       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9527       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9528 
9529       // Check 1: Make sure that the first operand of the inner multiply is NOT
9530       // a constant. Otherwise, we may induce infinite looping.
9531       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9532         // Check 2: Make sure that the second operand of the inner multiply and
9533         // the second operand of the outer multiply are constants.
9534         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9535             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9536           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9537           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9538         }
9539       }
9540     }
9541 
9542     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9543     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9544     // during an early run of DAGCombiner can prevent folding with fmuls
9545     // inserted during lowering.
9546     if (N0.getOpcode() == ISD::FADD &&
9547         (N0.getOperand(0) == N0.getOperand(1)) &&
9548         N0.hasOneUse()) {
9549       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9550       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9551       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9552     }
9553   }
9554 
9555   // fold (fmul X, 2.0) -> (fadd X, X)
9556   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9557     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9558 
9559   // fold (fmul X, -1.0) -> (fneg X)
9560   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9561     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9562       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9563 
9564   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9565   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9566     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9567       // Both can be negated for free, check to see if at least one is cheaper
9568       // negated.
9569       if (LHSNeg == 2 || RHSNeg == 2)
9570         return DAG.getNode(ISD::FMUL, DL, VT,
9571                            GetNegatedExpression(N0, DAG, LegalOperations),
9572                            GetNegatedExpression(N1, DAG, LegalOperations),
9573                            Flags);
9574     }
9575   }
9576 
9577   // FMUL -> FMA combines:
9578   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9579     AddToWorklist(Fused.getNode());
9580     return Fused;
9581   }
9582 
9583   return SDValue();
9584 }
9585 
9586 SDValue DAGCombiner::visitFMA(SDNode *N) {
9587   SDValue N0 = N->getOperand(0);
9588   SDValue N1 = N->getOperand(1);
9589   SDValue N2 = N->getOperand(2);
9590   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9591   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9592   EVT VT = N->getValueType(0);
9593   SDLoc DL(N);
9594   const TargetOptions &Options = DAG.getTarget().Options;
9595 
9596   // Constant fold FMA.
9597   if (isa<ConstantFPSDNode>(N0) &&
9598       isa<ConstantFPSDNode>(N1) &&
9599       isa<ConstantFPSDNode>(N2)) {
9600     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9601   }
9602 
9603   if (Options.UnsafeFPMath) {
9604     if (N0CFP && N0CFP->isZero())
9605       return N2;
9606     if (N1CFP && N1CFP->isZero())
9607       return N2;
9608   }
9609   // TODO: The FMA node should have flags that propagate to these nodes.
9610   if (N0CFP && N0CFP->isExactlyValue(1.0))
9611     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9612   if (N1CFP && N1CFP->isExactlyValue(1.0))
9613     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9614 
9615   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9616   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9617      !isConstantFPBuildVectorOrConstantFP(N1))
9618     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9619 
9620   // TODO: FMA nodes should have flags that propagate to the created nodes.
9621   // For now, create a Flags object for use with all unsafe math transforms.
9622   SDNodeFlags Flags;
9623   Flags.setUnsafeAlgebra(true);
9624 
9625   if (Options.UnsafeFPMath) {
9626     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9627     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9628         isConstantFPBuildVectorOrConstantFP(N1) &&
9629         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9630       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9631                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9632                                      &Flags), &Flags);
9633     }
9634 
9635     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9636     if (N0.getOpcode() == ISD::FMUL &&
9637         isConstantFPBuildVectorOrConstantFP(N1) &&
9638         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9639       return DAG.getNode(ISD::FMA, DL, VT,
9640                          N0.getOperand(0),
9641                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9642                                      &Flags),
9643                          N2);
9644     }
9645   }
9646 
9647   // (fma x, 1, y) -> (fadd x, y)
9648   // (fma x, -1, y) -> (fadd (fneg x), y)
9649   if (N1CFP) {
9650     if (N1CFP->isExactlyValue(1.0))
9651       // TODO: The FMA node should have flags that propagate to this node.
9652       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9653 
9654     if (N1CFP->isExactlyValue(-1.0) &&
9655         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9656       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9657       AddToWorklist(RHSNeg.getNode());
9658       // TODO: The FMA node should have flags that propagate to this node.
9659       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9660     }
9661   }
9662 
9663   if (Options.UnsafeFPMath) {
9664     // (fma x, c, x) -> (fmul x, (c+1))
9665     if (N1CFP && N0 == N2) {
9666       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9667                          DAG.getNode(ISD::FADD, DL, VT, N1,
9668                                      DAG.getConstantFP(1.0, DL, VT), &Flags),
9669                          &Flags);
9670     }
9671 
9672     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9673     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9674       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9675                          DAG.getNode(ISD::FADD, DL, VT, N1,
9676                                      DAG.getConstantFP(-1.0, DL, VT), &Flags),
9677                          &Flags);
9678     }
9679   }
9680 
9681   return SDValue();
9682 }
9683 
9684 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9685 // reciprocal.
9686 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9687 // Notice that this is not always beneficial. One reason is different targets
9688 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9689 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9690 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9691 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9692   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9693   const SDNodeFlags *Flags = N->getFlags();
9694   if (!UnsafeMath && !Flags->hasAllowReciprocal())
9695     return SDValue();
9696 
9697   // Skip if current node is a reciprocal.
9698   SDValue N0 = N->getOperand(0);
9699   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9700   if (N0CFP && N0CFP->isExactlyValue(1.0))
9701     return SDValue();
9702 
9703   // Exit early if the target does not want this transform or if there can't
9704   // possibly be enough uses of the divisor to make the transform worthwhile.
9705   SDValue N1 = N->getOperand(1);
9706   unsigned MinUses = TLI.combineRepeatedFPDivisors();
9707   if (!MinUses || N1->use_size() < MinUses)
9708     return SDValue();
9709 
9710   // Find all FDIV users of the same divisor.
9711   // Use a set because duplicates may be present in the user list.
9712   SetVector<SDNode *> Users;
9713   for (auto *U : N1->uses()) {
9714     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9715       // This division is eligible for optimization only if global unsafe math
9716       // is enabled or if this division allows reciprocal formation.
9717       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
9718         Users.insert(U);
9719     }
9720   }
9721 
9722   // Now that we have the actual number of divisor uses, make sure it meets
9723   // the minimum threshold specified by the target.
9724   if (Users.size() < MinUses)
9725     return SDValue();
9726 
9727   EVT VT = N->getValueType(0);
9728   SDLoc DL(N);
9729   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
9730   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
9731 
9732   // Dividend / Divisor -> Dividend * Reciprocal
9733   for (auto *U : Users) {
9734     SDValue Dividend = U->getOperand(0);
9735     if (Dividend != FPOne) {
9736       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
9737                                     Reciprocal, Flags);
9738       CombineTo(U, NewNode);
9739     } else if (U != Reciprocal.getNode()) {
9740       // In the absence of fast-math-flags, this user node is always the
9741       // same node as Reciprocal, but with FMF they may be different nodes.
9742       CombineTo(U, Reciprocal);
9743     }
9744   }
9745   return SDValue(N, 0);  // N was replaced.
9746 }
9747 
9748 SDValue DAGCombiner::visitFDIV(SDNode *N) {
9749   SDValue N0 = N->getOperand(0);
9750   SDValue N1 = N->getOperand(1);
9751   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9752   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9753   EVT VT = N->getValueType(0);
9754   SDLoc DL(N);
9755   const TargetOptions &Options = DAG.getTarget().Options;
9756   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9757 
9758   // fold vector ops
9759   if (VT.isVector())
9760     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9761       return FoldedVOp;
9762 
9763   // fold (fdiv c1, c2) -> c1/c2
9764   if (N0CFP && N1CFP)
9765     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
9766 
9767   if (SDValue NewSel = foldBinOpIntoSelect(N))
9768     return NewSel;
9769 
9770   if (Options.UnsafeFPMath) {
9771     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
9772     if (N1CFP) {
9773       // Compute the reciprocal 1.0 / c2.
9774       const APFloat &N1APF = N1CFP->getValueAPF();
9775       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
9776       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
9777       // Only do the transform if the reciprocal is a legal fp immediate that
9778       // isn't too nasty (eg NaN, denormal, ...).
9779       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
9780           (!LegalOperations ||
9781            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
9782            // backend)... we should handle this gracefully after Legalize.
9783            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
9784            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
9785            TLI.isFPImmLegal(Recip, VT)))
9786         return DAG.getNode(ISD::FMUL, DL, VT, N0,
9787                            DAG.getConstantFP(Recip, DL, VT), Flags);
9788     }
9789 
9790     // If this FDIV is part of a reciprocal square root, it may be folded
9791     // into a target-specific square root estimate instruction.
9792     if (N1.getOpcode() == ISD::FSQRT) {
9793       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
9794         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9795       }
9796     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
9797                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9798       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9799                                           Flags)) {
9800         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
9801         AddToWorklist(RV.getNode());
9802         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9803       }
9804     } else if (N1.getOpcode() == ISD::FP_ROUND &&
9805                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9806       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9807                                           Flags)) {
9808         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
9809         AddToWorklist(RV.getNode());
9810         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9811       }
9812     } else if (N1.getOpcode() == ISD::FMUL) {
9813       // Look through an FMUL. Even though this won't remove the FDIV directly,
9814       // it's still worthwhile to get rid of the FSQRT if possible.
9815       SDValue SqrtOp;
9816       SDValue OtherOp;
9817       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9818         SqrtOp = N1.getOperand(0);
9819         OtherOp = N1.getOperand(1);
9820       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
9821         SqrtOp = N1.getOperand(1);
9822         OtherOp = N1.getOperand(0);
9823       }
9824       if (SqrtOp.getNode()) {
9825         // We found a FSQRT, so try to make this fold:
9826         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
9827         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
9828           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
9829           AddToWorklist(RV.getNode());
9830           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9831         }
9832       }
9833     }
9834 
9835     // Fold into a reciprocal estimate and multiply instead of a real divide.
9836     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
9837       AddToWorklist(RV.getNode());
9838       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9839     }
9840   }
9841 
9842   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
9843   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9844     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9845       // Both can be negated for free, check to see if at least one is cheaper
9846       // negated.
9847       if (LHSNeg == 2 || RHSNeg == 2)
9848         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
9849                            GetNegatedExpression(N0, DAG, LegalOperations),
9850                            GetNegatedExpression(N1, DAG, LegalOperations),
9851                            Flags);
9852     }
9853   }
9854 
9855   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
9856     return CombineRepeatedDivisors;
9857 
9858   return SDValue();
9859 }
9860 
9861 SDValue DAGCombiner::visitFREM(SDNode *N) {
9862   SDValue N0 = N->getOperand(0);
9863   SDValue N1 = N->getOperand(1);
9864   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9865   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9866   EVT VT = N->getValueType(0);
9867 
9868   // fold (frem c1, c2) -> fmod(c1,c2)
9869   if (N0CFP && N1CFP)
9870     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
9871                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
9872 
9873   if (SDValue NewSel = foldBinOpIntoSelect(N))
9874     return NewSel;
9875 
9876   return SDValue();
9877 }
9878 
9879 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
9880   if (!DAG.getTarget().Options.UnsafeFPMath)
9881     return SDValue();
9882 
9883   SDValue N0 = N->getOperand(0);
9884   if (TLI.isFsqrtCheap(N0, DAG))
9885     return SDValue();
9886 
9887   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
9888   // For now, create a Flags object for use with all unsafe math transforms.
9889   SDNodeFlags Flags;
9890   Flags.setUnsafeAlgebra(true);
9891   return buildSqrtEstimate(N0, &Flags);
9892 }
9893 
9894 /// copysign(x, fp_extend(y)) -> copysign(x, y)
9895 /// copysign(x, fp_round(y)) -> copysign(x, y)
9896 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
9897   SDValue N1 = N->getOperand(1);
9898   if ((N1.getOpcode() == ISD::FP_EXTEND ||
9899        N1.getOpcode() == ISD::FP_ROUND)) {
9900     // Do not optimize out type conversion of f128 type yet.
9901     // For some targets like x86_64, configuration is changed to keep one f128
9902     // value in one SSE register, but instruction selection cannot handle
9903     // FCOPYSIGN on SSE registers yet.
9904     EVT N1VT = N1->getValueType(0);
9905     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
9906     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
9907   }
9908   return false;
9909 }
9910 
9911 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
9912   SDValue N0 = N->getOperand(0);
9913   SDValue N1 = N->getOperand(1);
9914   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9915   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9916   EVT VT = N->getValueType(0);
9917 
9918   if (N0CFP && N1CFP) // Constant fold
9919     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
9920 
9921   if (N1CFP) {
9922     const APFloat &V = N1CFP->getValueAPF();
9923     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
9924     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9925     if (!V.isNegative()) {
9926       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9927         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9928     } else {
9929       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9930         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9931                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9932     }
9933   }
9934 
9935   // copysign(fabs(x), y) -> copysign(x, y)
9936   // copysign(fneg(x), y) -> copysign(x, y)
9937   // copysign(copysign(x,z), y) -> copysign(x, y)
9938   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9939       N0.getOpcode() == ISD::FCOPYSIGN)
9940     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
9941 
9942   // copysign(x, abs(y)) -> abs(x)
9943   if (N1.getOpcode() == ISD::FABS)
9944     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9945 
9946   // copysign(x, copysign(y,z)) -> copysign(x, z)
9947   if (N1.getOpcode() == ISD::FCOPYSIGN)
9948     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
9949 
9950   // copysign(x, fp_extend(y)) -> copysign(x, y)
9951   // copysign(x, fp_round(y)) -> copysign(x, y)
9952   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
9953     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
9954 
9955   return SDValue();
9956 }
9957 
9958 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9959   SDValue N0 = N->getOperand(0);
9960   EVT VT = N->getValueType(0);
9961   EVT OpVT = N0.getValueType();
9962 
9963   // fold (sint_to_fp c1) -> c1fp
9964   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9965       // ...but only if the target supports immediate floating-point values
9966       (!LegalOperations ||
9967        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9968     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9969 
9970   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9971   // but UINT_TO_FP is legal on this target, try to convert.
9972   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9973       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9974     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9975     if (DAG.SignBitIsZero(N0))
9976       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9977   }
9978 
9979   // The next optimizations are desirable only if SELECT_CC can be lowered.
9980   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9981     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9982     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9983         !VT.isVector() &&
9984         (!LegalOperations ||
9985          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9986       SDLoc DL(N);
9987       SDValue Ops[] =
9988         { N0.getOperand(0), N0.getOperand(1),
9989           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9990           N0.getOperand(2) };
9991       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9992     }
9993 
9994     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9995     //      (select_cc x, y, 1.0, 0.0,, cc)
9996     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9997         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9998         (!LegalOperations ||
9999          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10000       SDLoc DL(N);
10001       SDValue Ops[] =
10002         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10003           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10004           N0.getOperand(0).getOperand(2) };
10005       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10006     }
10007   }
10008 
10009   return SDValue();
10010 }
10011 
10012 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10013   SDValue N0 = N->getOperand(0);
10014   EVT VT = N->getValueType(0);
10015   EVT OpVT = N0.getValueType();
10016 
10017   // fold (uint_to_fp c1) -> c1fp
10018   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10019       // ...but only if the target supports immediate floating-point values
10020       (!LegalOperations ||
10021        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10022     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10023 
10024   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10025   // but SINT_TO_FP is legal on this target, try to convert.
10026   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10027       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10028     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10029     if (DAG.SignBitIsZero(N0))
10030       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10031   }
10032 
10033   // The next optimizations are desirable only if SELECT_CC can be lowered.
10034   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10035     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10036 
10037     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10038         (!LegalOperations ||
10039          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10040       SDLoc DL(N);
10041       SDValue Ops[] =
10042         { N0.getOperand(0), N0.getOperand(1),
10043           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10044           N0.getOperand(2) };
10045       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10046     }
10047   }
10048 
10049   return SDValue();
10050 }
10051 
10052 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10053 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10054   SDValue N0 = N->getOperand(0);
10055   EVT VT = N->getValueType(0);
10056 
10057   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10058     return SDValue();
10059 
10060   SDValue Src = N0.getOperand(0);
10061   EVT SrcVT = Src.getValueType();
10062   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10063   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10064 
10065   // We can safely assume the conversion won't overflow the output range,
10066   // because (for example) (uint8_t)18293.f is undefined behavior.
10067 
10068   // Since we can assume the conversion won't overflow, our decision as to
10069   // whether the input will fit in the float should depend on the minimum
10070   // of the input range and output range.
10071 
10072   // This means this is also safe for a signed input and unsigned output, since
10073   // a negative input would lead to undefined behavior.
10074   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10075   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10076   unsigned ActualSize = std::min(InputSize, OutputSize);
10077   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10078 
10079   // We can only fold away the float conversion if the input range can be
10080   // represented exactly in the float range.
10081   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10082     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10083       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10084                                                        : ISD::ZERO_EXTEND;
10085       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10086     }
10087     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10088       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10089     return DAG.getBitcast(VT, Src);
10090   }
10091   return SDValue();
10092 }
10093 
10094 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10095   SDValue N0 = N->getOperand(0);
10096   EVT VT = N->getValueType(0);
10097 
10098   // fold (fp_to_sint c1fp) -> c1
10099   if (isConstantFPBuildVectorOrConstantFP(N0))
10100     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10101 
10102   return FoldIntToFPToInt(N, DAG);
10103 }
10104 
10105 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10106   SDValue N0 = N->getOperand(0);
10107   EVT VT = N->getValueType(0);
10108 
10109   // fold (fp_to_uint c1fp) -> c1
10110   if (isConstantFPBuildVectorOrConstantFP(N0))
10111     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10112 
10113   return FoldIntToFPToInt(N, DAG);
10114 }
10115 
10116 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10117   SDValue N0 = N->getOperand(0);
10118   SDValue N1 = N->getOperand(1);
10119   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10120   EVT VT = N->getValueType(0);
10121 
10122   // fold (fp_round c1fp) -> c1fp
10123   if (N0CFP)
10124     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10125 
10126   // fold (fp_round (fp_extend x)) -> x
10127   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10128     return N0.getOperand(0);
10129 
10130   // fold (fp_round (fp_round x)) -> (fp_round x)
10131   if (N0.getOpcode() == ISD::FP_ROUND) {
10132     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10133     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10134 
10135     // Skip this folding if it results in an fp_round from f80 to f16.
10136     //
10137     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10138     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10139     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10140     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10141     // x86.
10142     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10143       return SDValue();
10144 
10145     // If the first fp_round isn't a value preserving truncation, it might
10146     // introduce a tie in the second fp_round, that wouldn't occur in the
10147     // single-step fp_round we want to fold to.
10148     // In other words, double rounding isn't the same as rounding.
10149     // Also, this is a value preserving truncation iff both fp_round's are.
10150     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10151       SDLoc DL(N);
10152       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10153                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10154     }
10155   }
10156 
10157   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10158   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10159     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10160                               N0.getOperand(0), N1);
10161     AddToWorklist(Tmp.getNode());
10162     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10163                        Tmp, N0.getOperand(1));
10164   }
10165 
10166   return SDValue();
10167 }
10168 
10169 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10170   SDValue N0 = N->getOperand(0);
10171   EVT VT = N->getValueType(0);
10172   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10173   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10174 
10175   // fold (fp_round_inreg c1fp) -> c1fp
10176   if (N0CFP && isTypeLegal(EVT)) {
10177     SDLoc DL(N);
10178     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10179     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10180   }
10181 
10182   return SDValue();
10183 }
10184 
10185 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10186   SDValue N0 = N->getOperand(0);
10187   EVT VT = N->getValueType(0);
10188 
10189   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10190   if (N->hasOneUse() &&
10191       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10192     return SDValue();
10193 
10194   // fold (fp_extend c1fp) -> c1fp
10195   if (isConstantFPBuildVectorOrConstantFP(N0))
10196     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10197 
10198   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10199   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10200       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10201     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10202 
10203   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10204   // value of X.
10205   if (N0.getOpcode() == ISD::FP_ROUND
10206       && N0.getConstantOperandVal(1) == 1) {
10207     SDValue In = N0.getOperand(0);
10208     if (In.getValueType() == VT) return In;
10209     if (VT.bitsLT(In.getValueType()))
10210       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10211                          In, N0.getOperand(1));
10212     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10213   }
10214 
10215   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10216   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10217        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10218     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10219     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10220                                      LN0->getChain(),
10221                                      LN0->getBasePtr(), N0.getValueType(),
10222                                      LN0->getMemOperand());
10223     CombineTo(N, ExtLoad);
10224     CombineTo(N0.getNode(),
10225               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10226                           N0.getValueType(), ExtLoad,
10227                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10228               ExtLoad.getValue(1));
10229     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10230   }
10231 
10232   return SDValue();
10233 }
10234 
10235 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10236   SDValue N0 = N->getOperand(0);
10237   EVT VT = N->getValueType(0);
10238 
10239   // fold (fceil c1) -> fceil(c1)
10240   if (isConstantFPBuildVectorOrConstantFP(N0))
10241     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10242 
10243   return SDValue();
10244 }
10245 
10246 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10247   SDValue N0 = N->getOperand(0);
10248   EVT VT = N->getValueType(0);
10249 
10250   // fold (ftrunc c1) -> ftrunc(c1)
10251   if (isConstantFPBuildVectorOrConstantFP(N0))
10252     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10253 
10254   return SDValue();
10255 }
10256 
10257 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10258   SDValue N0 = N->getOperand(0);
10259   EVT VT = N->getValueType(0);
10260 
10261   // fold (ffloor c1) -> ffloor(c1)
10262   if (isConstantFPBuildVectorOrConstantFP(N0))
10263     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10264 
10265   return SDValue();
10266 }
10267 
10268 // FIXME: FNEG and FABS have a lot in common; refactor.
10269 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10270   SDValue N0 = N->getOperand(0);
10271   EVT VT = N->getValueType(0);
10272 
10273   // Constant fold FNEG.
10274   if (isConstantFPBuildVectorOrConstantFP(N0))
10275     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10276 
10277   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10278                          &DAG.getTarget().Options))
10279     return GetNegatedExpression(N0, DAG, LegalOperations);
10280 
10281   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10282   // constant pool values.
10283   if (!TLI.isFNegFree(VT) &&
10284       N0.getOpcode() == ISD::BITCAST &&
10285       N0.getNode()->hasOneUse()) {
10286     SDValue Int = N0.getOperand(0);
10287     EVT IntVT = Int.getValueType();
10288     if (IntVT.isInteger() && !IntVT.isVector()) {
10289       APInt SignMask;
10290       if (N0.getValueType().isVector()) {
10291         // For a vector, get a mask such as 0x80... per scalar element
10292         // and splat it.
10293         SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
10294         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10295       } else {
10296         // For a scalar, just generate 0x80...
10297         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
10298       }
10299       SDLoc DL0(N0);
10300       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10301                         DAG.getConstant(SignMask, DL0, IntVT));
10302       AddToWorklist(Int.getNode());
10303       return DAG.getBitcast(VT, Int);
10304     }
10305   }
10306 
10307   // (fneg (fmul c, x)) -> (fmul -c, x)
10308   if (N0.getOpcode() == ISD::FMUL &&
10309       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10310     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10311     if (CFP1) {
10312       APFloat CVal = CFP1->getValueAPF();
10313       CVal.changeSign();
10314       if (Level >= AfterLegalizeDAG &&
10315           (TLI.isFPImmLegal(CVal, VT) ||
10316            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10317         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10318                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10319                                        N0.getOperand(1)),
10320                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
10321     }
10322   }
10323 
10324   return SDValue();
10325 }
10326 
10327 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10328   SDValue N0 = N->getOperand(0);
10329   SDValue N1 = N->getOperand(1);
10330   EVT VT = N->getValueType(0);
10331   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10332   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10333 
10334   if (N0CFP && N1CFP) {
10335     const APFloat &C0 = N0CFP->getValueAPF();
10336     const APFloat &C1 = N1CFP->getValueAPF();
10337     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10338   }
10339 
10340   // Canonicalize to constant on RHS.
10341   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10342      !isConstantFPBuildVectorOrConstantFP(N1))
10343     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10344 
10345   return SDValue();
10346 }
10347 
10348 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10349   SDValue N0 = N->getOperand(0);
10350   SDValue N1 = N->getOperand(1);
10351   EVT VT = N->getValueType(0);
10352   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10353   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10354 
10355   if (N0CFP && N1CFP) {
10356     const APFloat &C0 = N0CFP->getValueAPF();
10357     const APFloat &C1 = N1CFP->getValueAPF();
10358     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10359   }
10360 
10361   // Canonicalize to constant on RHS.
10362   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10363      !isConstantFPBuildVectorOrConstantFP(N1))
10364     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10365 
10366   return SDValue();
10367 }
10368 
10369 SDValue DAGCombiner::visitFABS(SDNode *N) {
10370   SDValue N0 = N->getOperand(0);
10371   EVT VT = N->getValueType(0);
10372 
10373   // fold (fabs c1) -> fabs(c1)
10374   if (isConstantFPBuildVectorOrConstantFP(N0))
10375     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10376 
10377   // fold (fabs (fabs x)) -> (fabs x)
10378   if (N0.getOpcode() == ISD::FABS)
10379     return N->getOperand(0);
10380 
10381   // fold (fabs (fneg x)) -> (fabs x)
10382   // fold (fabs (fcopysign x, y)) -> (fabs x)
10383   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10384     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10385 
10386   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10387   // constant pool values.
10388   if (!TLI.isFAbsFree(VT) &&
10389       N0.getOpcode() == ISD::BITCAST &&
10390       N0.getNode()->hasOneUse()) {
10391     SDValue Int = N0.getOperand(0);
10392     EVT IntVT = Int.getValueType();
10393     if (IntVT.isInteger() && !IntVT.isVector()) {
10394       APInt SignMask;
10395       if (N0.getValueType().isVector()) {
10396         // For a vector, get a mask such as 0x7f... per scalar element
10397         // and splat it.
10398         SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
10399         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10400       } else {
10401         // For a scalar, just generate 0x7f...
10402         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
10403       }
10404       SDLoc DL(N0);
10405       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10406                         DAG.getConstant(SignMask, DL, IntVT));
10407       AddToWorklist(Int.getNode());
10408       return DAG.getBitcast(N->getValueType(0), Int);
10409     }
10410   }
10411 
10412   return SDValue();
10413 }
10414 
10415 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10416   SDValue Chain = N->getOperand(0);
10417   SDValue N1 = N->getOperand(1);
10418   SDValue N2 = N->getOperand(2);
10419 
10420   // If N is a constant we could fold this into a fallthrough or unconditional
10421   // branch. However that doesn't happen very often in normal code, because
10422   // Instcombine/SimplifyCFG should have handled the available opportunities.
10423   // If we did this folding here, it would be necessary to update the
10424   // MachineBasicBlock CFG, which is awkward.
10425 
10426   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10427   // on the target.
10428   if (N1.getOpcode() == ISD::SETCC &&
10429       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10430                                    N1.getOperand(0).getValueType())) {
10431     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10432                        Chain, N1.getOperand(2),
10433                        N1.getOperand(0), N1.getOperand(1), N2);
10434   }
10435 
10436   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10437       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10438        (N1.getOperand(0).hasOneUse() &&
10439         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10440     SDNode *Trunc = nullptr;
10441     if (N1.getOpcode() == ISD::TRUNCATE) {
10442       // Look pass the truncate.
10443       Trunc = N1.getNode();
10444       N1 = N1.getOperand(0);
10445     }
10446 
10447     // Match this pattern so that we can generate simpler code:
10448     //
10449     //   %a = ...
10450     //   %b = and i32 %a, 2
10451     //   %c = srl i32 %b, 1
10452     //   brcond i32 %c ...
10453     //
10454     // into
10455     //
10456     //   %a = ...
10457     //   %b = and i32 %a, 2
10458     //   %c = setcc eq %b, 0
10459     //   brcond %c ...
10460     //
10461     // This applies only when the AND constant value has one bit set and the
10462     // SRL constant is equal to the log2 of the AND constant. The back-end is
10463     // smart enough to convert the result into a TEST/JMP sequence.
10464     SDValue Op0 = N1.getOperand(0);
10465     SDValue Op1 = N1.getOperand(1);
10466 
10467     if (Op0.getOpcode() == ISD::AND &&
10468         Op1.getOpcode() == ISD::Constant) {
10469       SDValue AndOp1 = Op0.getOperand(1);
10470 
10471       if (AndOp1.getOpcode() == ISD::Constant) {
10472         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10473 
10474         if (AndConst.isPowerOf2() &&
10475             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10476           SDLoc DL(N);
10477           SDValue SetCC =
10478             DAG.getSetCC(DL,
10479                          getSetCCResultType(Op0.getValueType()),
10480                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10481                          ISD::SETNE);
10482 
10483           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10484                                           MVT::Other, Chain, SetCC, N2);
10485           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10486           // will convert it back to (X & C1) >> C2.
10487           CombineTo(N, NewBRCond, false);
10488           // Truncate is dead.
10489           if (Trunc)
10490             deleteAndRecombine(Trunc);
10491           // Replace the uses of SRL with SETCC
10492           WorklistRemover DeadNodes(*this);
10493           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10494           deleteAndRecombine(N1.getNode());
10495           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10496         }
10497       }
10498     }
10499 
10500     if (Trunc)
10501       // Restore N1 if the above transformation doesn't match.
10502       N1 = N->getOperand(1);
10503   }
10504 
10505   // Transform br(xor(x, y)) -> br(x != y)
10506   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10507   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10508     SDNode *TheXor = N1.getNode();
10509     SDValue Op0 = TheXor->getOperand(0);
10510     SDValue Op1 = TheXor->getOperand(1);
10511     if (Op0.getOpcode() == Op1.getOpcode()) {
10512       // Avoid missing important xor optimizations.
10513       if (SDValue Tmp = visitXOR(TheXor)) {
10514         if (Tmp.getNode() != TheXor) {
10515           DEBUG(dbgs() << "\nReplacing.8 ";
10516                 TheXor->dump(&DAG);
10517                 dbgs() << "\nWith: ";
10518                 Tmp.getNode()->dump(&DAG);
10519                 dbgs() << '\n');
10520           WorklistRemover DeadNodes(*this);
10521           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10522           deleteAndRecombine(TheXor);
10523           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10524                              MVT::Other, Chain, Tmp, N2);
10525         }
10526 
10527         // visitXOR has changed XOR's operands or replaced the XOR completely,
10528         // bail out.
10529         return SDValue(N, 0);
10530       }
10531     }
10532 
10533     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10534       bool Equal = false;
10535       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10536           Op0.getOpcode() == ISD::XOR) {
10537         TheXor = Op0.getNode();
10538         Equal = true;
10539       }
10540 
10541       EVT SetCCVT = N1.getValueType();
10542       if (LegalTypes)
10543         SetCCVT = getSetCCResultType(SetCCVT);
10544       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10545                                    SetCCVT,
10546                                    Op0, Op1,
10547                                    Equal ? ISD::SETEQ : ISD::SETNE);
10548       // Replace the uses of XOR with SETCC
10549       WorklistRemover DeadNodes(*this);
10550       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10551       deleteAndRecombine(N1.getNode());
10552       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10553                          MVT::Other, Chain, SetCC, N2);
10554     }
10555   }
10556 
10557   return SDValue();
10558 }
10559 
10560 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10561 //
10562 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10563   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10564   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10565 
10566   // If N is a constant we could fold this into a fallthrough or unconditional
10567   // branch. However that doesn't happen very often in normal code, because
10568   // Instcombine/SimplifyCFG should have handled the available opportunities.
10569   // If we did this folding here, it would be necessary to update the
10570   // MachineBasicBlock CFG, which is awkward.
10571 
10572   // Use SimplifySetCC to simplify SETCC's.
10573   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10574                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10575                                false);
10576   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10577 
10578   // fold to a simpler setcc
10579   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10580     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10581                        N->getOperand(0), Simp.getOperand(2),
10582                        Simp.getOperand(0), Simp.getOperand(1),
10583                        N->getOperand(4));
10584 
10585   return SDValue();
10586 }
10587 
10588 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10589 /// and that N may be folded in the load / store addressing mode.
10590 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10591                                     SelectionDAG &DAG,
10592                                     const TargetLowering &TLI) {
10593   EVT VT;
10594   unsigned AS;
10595 
10596   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10597     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10598       return false;
10599     VT = LD->getMemoryVT();
10600     AS = LD->getAddressSpace();
10601   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10602     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10603       return false;
10604     VT = ST->getMemoryVT();
10605     AS = ST->getAddressSpace();
10606   } else
10607     return false;
10608 
10609   TargetLowering::AddrMode AM;
10610   if (N->getOpcode() == ISD::ADD) {
10611     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10612     if (Offset)
10613       // [reg +/- imm]
10614       AM.BaseOffs = Offset->getSExtValue();
10615     else
10616       // [reg +/- reg]
10617       AM.Scale = 1;
10618   } else if (N->getOpcode() == ISD::SUB) {
10619     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10620     if (Offset)
10621       // [reg +/- imm]
10622       AM.BaseOffs = -Offset->getSExtValue();
10623     else
10624       // [reg +/- reg]
10625       AM.Scale = 1;
10626   } else
10627     return false;
10628 
10629   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10630                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10631 }
10632 
10633 /// Try turning a load/store into a pre-indexed load/store when the base
10634 /// pointer is an add or subtract and it has other uses besides the load/store.
10635 /// After the transformation, the new indexed load/store has effectively folded
10636 /// the add/subtract in and all of its other uses are redirected to the
10637 /// new load/store.
10638 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10639   if (Level < AfterLegalizeDAG)
10640     return false;
10641 
10642   bool isLoad = true;
10643   SDValue Ptr;
10644   EVT VT;
10645   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10646     if (LD->isIndexed())
10647       return false;
10648     VT = LD->getMemoryVT();
10649     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10650         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10651       return false;
10652     Ptr = LD->getBasePtr();
10653   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10654     if (ST->isIndexed())
10655       return false;
10656     VT = ST->getMemoryVT();
10657     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10658         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10659       return false;
10660     Ptr = ST->getBasePtr();
10661     isLoad = false;
10662   } else {
10663     return false;
10664   }
10665 
10666   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10667   // out.  There is no reason to make this a preinc/predec.
10668   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10669       Ptr.getNode()->hasOneUse())
10670     return false;
10671 
10672   // Ask the target to do addressing mode selection.
10673   SDValue BasePtr;
10674   SDValue Offset;
10675   ISD::MemIndexedMode AM = ISD::UNINDEXED;
10676   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10677     return false;
10678 
10679   // Backends without true r+i pre-indexed forms may need to pass a
10680   // constant base with a variable offset so that constant coercion
10681   // will work with the patterns in canonical form.
10682   bool Swapped = false;
10683   if (isa<ConstantSDNode>(BasePtr)) {
10684     std::swap(BasePtr, Offset);
10685     Swapped = true;
10686   }
10687 
10688   // Don't create a indexed load / store with zero offset.
10689   if (isNullConstant(Offset))
10690     return false;
10691 
10692   // Try turning it into a pre-indexed load / store except when:
10693   // 1) The new base ptr is a frame index.
10694   // 2) If N is a store and the new base ptr is either the same as or is a
10695   //    predecessor of the value being stored.
10696   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10697   //    that would create a cycle.
10698   // 4) All uses are load / store ops that use it as old base ptr.
10699 
10700   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
10701   // (plus the implicit offset) to a register to preinc anyway.
10702   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10703     return false;
10704 
10705   // Check #2.
10706   if (!isLoad) {
10707     SDValue Val = cast<StoreSDNode>(N)->getValue();
10708     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10709       return false;
10710   }
10711 
10712   // Caches for hasPredecessorHelper.
10713   SmallPtrSet<const SDNode *, 32> Visited;
10714   SmallVector<const SDNode *, 16> Worklist;
10715   Worklist.push_back(N);
10716 
10717   // If the offset is a constant, there may be other adds of constants that
10718   // can be folded with this one. We should do this to avoid having to keep
10719   // a copy of the original base pointer.
10720   SmallVector<SDNode *, 16> OtherUses;
10721   if (isa<ConstantSDNode>(Offset))
10722     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
10723                               UE = BasePtr.getNode()->use_end();
10724          UI != UE; ++UI) {
10725       SDUse &Use = UI.getUse();
10726       // Skip the use that is Ptr and uses of other results from BasePtr's
10727       // node (important for nodes that return multiple results).
10728       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
10729         continue;
10730 
10731       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
10732         continue;
10733 
10734       if (Use.getUser()->getOpcode() != ISD::ADD &&
10735           Use.getUser()->getOpcode() != ISD::SUB) {
10736         OtherUses.clear();
10737         break;
10738       }
10739 
10740       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
10741       if (!isa<ConstantSDNode>(Op1)) {
10742         OtherUses.clear();
10743         break;
10744       }
10745 
10746       // FIXME: In some cases, we can be smarter about this.
10747       if (Op1.getValueType() != Offset.getValueType()) {
10748         OtherUses.clear();
10749         break;
10750       }
10751 
10752       OtherUses.push_back(Use.getUser());
10753     }
10754 
10755   if (Swapped)
10756     std::swap(BasePtr, Offset);
10757 
10758   // Now check for #3 and #4.
10759   bool RealUse = false;
10760 
10761   for (SDNode *Use : Ptr.getNode()->uses()) {
10762     if (Use == N)
10763       continue;
10764     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
10765       return false;
10766 
10767     // If Ptr may be folded in addressing mode of other use, then it's
10768     // not profitable to do this transformation.
10769     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
10770       RealUse = true;
10771   }
10772 
10773   if (!RealUse)
10774     return false;
10775 
10776   SDValue Result;
10777   if (isLoad)
10778     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10779                                 BasePtr, Offset, AM);
10780   else
10781     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10782                                  BasePtr, Offset, AM);
10783   ++PreIndexedNodes;
10784   ++NodesCombined;
10785   DEBUG(dbgs() << "\nReplacing.4 ";
10786         N->dump(&DAG);
10787         dbgs() << "\nWith: ";
10788         Result.getNode()->dump(&DAG);
10789         dbgs() << '\n');
10790   WorklistRemover DeadNodes(*this);
10791   if (isLoad) {
10792     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10793     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10794   } else {
10795     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10796   }
10797 
10798   // Finally, since the node is now dead, remove it from the graph.
10799   deleteAndRecombine(N);
10800 
10801   if (Swapped)
10802     std::swap(BasePtr, Offset);
10803 
10804   // Replace other uses of BasePtr that can be updated to use Ptr
10805   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
10806     unsigned OffsetIdx = 1;
10807     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
10808       OffsetIdx = 0;
10809     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
10810            BasePtr.getNode() && "Expected BasePtr operand");
10811 
10812     // We need to replace ptr0 in the following expression:
10813     //   x0 * offset0 + y0 * ptr0 = t0
10814     // knowing that
10815     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
10816     //
10817     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
10818     // indexed load/store and the expresion that needs to be re-written.
10819     //
10820     // Therefore, we have:
10821     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
10822 
10823     ConstantSDNode *CN =
10824       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
10825     int X0, X1, Y0, Y1;
10826     const APInt &Offset0 = CN->getAPIntValue();
10827     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
10828 
10829     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
10830     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
10831     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
10832     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
10833 
10834     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
10835 
10836     APInt CNV = Offset0;
10837     if (X0 < 0) CNV = -CNV;
10838     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
10839     else CNV = CNV - Offset1;
10840 
10841     SDLoc DL(OtherUses[i]);
10842 
10843     // We can now generate the new expression.
10844     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
10845     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
10846 
10847     SDValue NewUse = DAG.getNode(Opcode,
10848                                  DL,
10849                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
10850     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
10851     deleteAndRecombine(OtherUses[i]);
10852   }
10853 
10854   // Replace the uses of Ptr with uses of the updated base value.
10855   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
10856   deleteAndRecombine(Ptr.getNode());
10857 
10858   return true;
10859 }
10860 
10861 /// Try to combine a load/store with a add/sub of the base pointer node into a
10862 /// post-indexed load/store. The transformation folded the add/subtract into the
10863 /// new indexed load/store effectively and all of its uses are redirected to the
10864 /// new load/store.
10865 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
10866   if (Level < AfterLegalizeDAG)
10867     return false;
10868 
10869   bool isLoad = true;
10870   SDValue Ptr;
10871   EVT VT;
10872   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10873     if (LD->isIndexed())
10874       return false;
10875     VT = LD->getMemoryVT();
10876     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
10877         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
10878       return false;
10879     Ptr = LD->getBasePtr();
10880   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10881     if (ST->isIndexed())
10882       return false;
10883     VT = ST->getMemoryVT();
10884     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
10885         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
10886       return false;
10887     Ptr = ST->getBasePtr();
10888     isLoad = false;
10889   } else {
10890     return false;
10891   }
10892 
10893   if (Ptr.getNode()->hasOneUse())
10894     return false;
10895 
10896   for (SDNode *Op : Ptr.getNode()->uses()) {
10897     if (Op == N ||
10898         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
10899       continue;
10900 
10901     SDValue BasePtr;
10902     SDValue Offset;
10903     ISD::MemIndexedMode AM = ISD::UNINDEXED;
10904     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
10905       // Don't create a indexed load / store with zero offset.
10906       if (isNullConstant(Offset))
10907         continue;
10908 
10909       // Try turning it into a post-indexed load / store except when
10910       // 1) All uses are load / store ops that use it as base ptr (and
10911       //    it may be folded as addressing mmode).
10912       // 2) Op must be independent of N, i.e. Op is neither a predecessor
10913       //    nor a successor of N. Otherwise, if Op is folded that would
10914       //    create a cycle.
10915 
10916       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10917         continue;
10918 
10919       // Check for #1.
10920       bool TryNext = false;
10921       for (SDNode *Use : BasePtr.getNode()->uses()) {
10922         if (Use == Ptr.getNode())
10923           continue;
10924 
10925         // If all the uses are load / store addresses, then don't do the
10926         // transformation.
10927         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10928           bool RealUse = false;
10929           for (SDNode *UseUse : Use->uses()) {
10930             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10931               RealUse = true;
10932           }
10933 
10934           if (!RealUse) {
10935             TryNext = true;
10936             break;
10937           }
10938         }
10939       }
10940 
10941       if (TryNext)
10942         continue;
10943 
10944       // Check for #2
10945       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10946         SDValue Result = isLoad
10947           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10948                                BasePtr, Offset, AM)
10949           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10950                                 BasePtr, Offset, AM);
10951         ++PostIndexedNodes;
10952         ++NodesCombined;
10953         DEBUG(dbgs() << "\nReplacing.5 ";
10954               N->dump(&DAG);
10955               dbgs() << "\nWith: ";
10956               Result.getNode()->dump(&DAG);
10957               dbgs() << '\n');
10958         WorklistRemover DeadNodes(*this);
10959         if (isLoad) {
10960           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10961           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10962         } else {
10963           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10964         }
10965 
10966         // Finally, since the node is now dead, remove it from the graph.
10967         deleteAndRecombine(N);
10968 
10969         // Replace the uses of Use with uses of the updated base value.
10970         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
10971                                       Result.getValue(isLoad ? 1 : 0));
10972         deleteAndRecombine(Op);
10973         return true;
10974       }
10975     }
10976   }
10977 
10978   return false;
10979 }
10980 
10981 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10982 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10983   ISD::MemIndexedMode AM = LD->getAddressingMode();
10984   assert(AM != ISD::UNINDEXED);
10985   SDValue BP = LD->getOperand(1);
10986   SDValue Inc = LD->getOperand(2);
10987 
10988   // Some backends use TargetConstants for load offsets, but don't expect
10989   // TargetConstants in general ADD nodes. We can convert these constants into
10990   // regular Constants (if the constant is not opaque).
10991   assert((Inc.getOpcode() != ISD::TargetConstant ||
10992           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10993          "Cannot split out indexing using opaque target constants");
10994   if (Inc.getOpcode() == ISD::TargetConstant) {
10995     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10996     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10997                           ConstInc->getValueType(0));
10998   }
10999 
11000   unsigned Opc =
11001       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11002   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11003 }
11004 
11005 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11006   LoadSDNode *LD  = cast<LoadSDNode>(N);
11007   SDValue Chain = LD->getChain();
11008   SDValue Ptr   = LD->getBasePtr();
11009 
11010   // If load is not volatile and there are no uses of the loaded value (and
11011   // the updated indexed value in case of indexed loads), change uses of the
11012   // chain value into uses of the chain input (i.e. delete the dead load).
11013   if (!LD->isVolatile()) {
11014     if (N->getValueType(1) == MVT::Other) {
11015       // Unindexed loads.
11016       if (!N->hasAnyUseOfValue(0)) {
11017         // It's not safe to use the two value CombineTo variant here. e.g.
11018         // v1, chain2 = load chain1, loc
11019         // v2, chain3 = load chain2, loc
11020         // v3         = add v2, c
11021         // Now we replace use of chain2 with chain1.  This makes the second load
11022         // isomorphic to the one we are deleting, and thus makes this load live.
11023         DEBUG(dbgs() << "\nReplacing.6 ";
11024               N->dump(&DAG);
11025               dbgs() << "\nWith chain: ";
11026               Chain.getNode()->dump(&DAG);
11027               dbgs() << "\n");
11028         WorklistRemover DeadNodes(*this);
11029         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11030         AddUsersToWorklist(Chain.getNode());
11031         if (N->use_empty())
11032           deleteAndRecombine(N);
11033 
11034         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11035       }
11036     } else {
11037       // Indexed loads.
11038       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11039 
11040       // If this load has an opaque TargetConstant offset, then we cannot split
11041       // the indexing into an add/sub directly (that TargetConstant may not be
11042       // valid for a different type of node, and we cannot convert an opaque
11043       // target constant into a regular constant).
11044       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11045                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11046 
11047       if (!N->hasAnyUseOfValue(0) &&
11048           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11049         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11050         SDValue Index;
11051         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11052           Index = SplitIndexingFromLoad(LD);
11053           // Try to fold the base pointer arithmetic into subsequent loads and
11054           // stores.
11055           AddUsersToWorklist(N);
11056         } else
11057           Index = DAG.getUNDEF(N->getValueType(1));
11058         DEBUG(dbgs() << "\nReplacing.7 ";
11059               N->dump(&DAG);
11060               dbgs() << "\nWith: ";
11061               Undef.getNode()->dump(&DAG);
11062               dbgs() << " and 2 other values\n");
11063         WorklistRemover DeadNodes(*this);
11064         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11065         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11066         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11067         deleteAndRecombine(N);
11068         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11069       }
11070     }
11071   }
11072 
11073   // If this load is directly stored, replace the load value with the stored
11074   // value.
11075   // TODO: Handle store large -> read small portion.
11076   // TODO: Handle TRUNCSTORE/LOADEXT
11077   if (OptLevel != CodeGenOpt::None &&
11078       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11079     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11080       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11081       if (PrevST->getBasePtr() == Ptr &&
11082           PrevST->getValue().getValueType() == N->getValueType(0))
11083         return CombineTo(N, PrevST->getOperand(1), Chain);
11084     }
11085   }
11086 
11087   // Try to infer better alignment information than the load already has.
11088   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11089     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11090       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11091         SDValue NewLoad = DAG.getExtLoad(
11092             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11093             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11094             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11095         if (NewLoad.getNode() != N)
11096           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11097       }
11098     }
11099   }
11100 
11101   if (LD->isUnindexed()) {
11102     // Walk up chain skipping non-aliasing memory nodes.
11103     SDValue BetterChain = FindBetterChain(N, Chain);
11104 
11105     // If there is a better chain.
11106     if (Chain != BetterChain) {
11107       SDValue ReplLoad;
11108 
11109       // Replace the chain to void dependency.
11110       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11111         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11112                                BetterChain, Ptr, LD->getMemOperand());
11113       } else {
11114         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11115                                   LD->getValueType(0),
11116                                   BetterChain, Ptr, LD->getMemoryVT(),
11117                                   LD->getMemOperand());
11118       }
11119 
11120       // Create token factor to keep old chain connected.
11121       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11122                                   MVT::Other, Chain, ReplLoad.getValue(1));
11123 
11124       // Make sure the new and old chains are cleaned up.
11125       AddToWorklist(Token.getNode());
11126 
11127       // Replace uses with load result and token factor. Don't add users
11128       // to work list.
11129       return CombineTo(N, ReplLoad.getValue(0), Token, false);
11130     }
11131   }
11132 
11133   // Try transforming N to an indexed load.
11134   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11135     return SDValue(N, 0);
11136 
11137   // Try to slice up N to more direct loads if the slices are mapped to
11138   // different register banks or pairing can take place.
11139   if (SliceUpLoad(N))
11140     return SDValue(N, 0);
11141 
11142   return SDValue();
11143 }
11144 
11145 namespace {
11146 /// \brief Helper structure used to slice a load in smaller loads.
11147 /// Basically a slice is obtained from the following sequence:
11148 /// Origin = load Ty1, Base
11149 /// Shift = srl Ty1 Origin, CstTy Amount
11150 /// Inst = trunc Shift to Ty2
11151 ///
11152 /// Then, it will be rewriten into:
11153 /// Slice = load SliceTy, Base + SliceOffset
11154 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11155 ///
11156 /// SliceTy is deduced from the number of bits that are actually used to
11157 /// build Inst.
11158 struct LoadedSlice {
11159   /// \brief Helper structure used to compute the cost of a slice.
11160   struct Cost {
11161     /// Are we optimizing for code size.
11162     bool ForCodeSize;
11163     /// Various cost.
11164     unsigned Loads;
11165     unsigned Truncates;
11166     unsigned CrossRegisterBanksCopies;
11167     unsigned ZExts;
11168     unsigned Shift;
11169 
11170     Cost(bool ForCodeSize = false)
11171         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11172           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11173 
11174     /// \brief Get the cost of one isolated slice.
11175     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11176         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11177           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11178       EVT TruncType = LS.Inst->getValueType(0);
11179       EVT LoadedType = LS.getLoadedType();
11180       if (TruncType != LoadedType &&
11181           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11182         ZExts = 1;
11183     }
11184 
11185     /// \brief Account for slicing gain in the current cost.
11186     /// Slicing provide a few gains like removing a shift or a
11187     /// truncate. This method allows to grow the cost of the original
11188     /// load with the gain from this slice.
11189     void addSliceGain(const LoadedSlice &LS) {
11190       // Each slice saves a truncate.
11191       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11192       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11193                               LS.Inst->getValueType(0)))
11194         ++Truncates;
11195       // If there is a shift amount, this slice gets rid of it.
11196       if (LS.Shift)
11197         ++Shift;
11198       // If this slice can merge a cross register bank copy, account for it.
11199       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11200         ++CrossRegisterBanksCopies;
11201     }
11202 
11203     Cost &operator+=(const Cost &RHS) {
11204       Loads += RHS.Loads;
11205       Truncates += RHS.Truncates;
11206       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11207       ZExts += RHS.ZExts;
11208       Shift += RHS.Shift;
11209       return *this;
11210     }
11211 
11212     bool operator==(const Cost &RHS) const {
11213       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11214              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11215              ZExts == RHS.ZExts && Shift == RHS.Shift;
11216     }
11217 
11218     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11219 
11220     bool operator<(const Cost &RHS) const {
11221       // Assume cross register banks copies are as expensive as loads.
11222       // FIXME: Do we want some more target hooks?
11223       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11224       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11225       // Unless we are optimizing for code size, consider the
11226       // expensive operation first.
11227       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11228         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11229       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11230              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11231     }
11232 
11233     bool operator>(const Cost &RHS) const { return RHS < *this; }
11234 
11235     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11236 
11237     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11238   };
11239   // The last instruction that represent the slice. This should be a
11240   // truncate instruction.
11241   SDNode *Inst;
11242   // The original load instruction.
11243   LoadSDNode *Origin;
11244   // The right shift amount in bits from the original load.
11245   unsigned Shift;
11246   // The DAG from which Origin came from.
11247   // This is used to get some contextual information about legal types, etc.
11248   SelectionDAG *DAG;
11249 
11250   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11251               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11252       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11253 
11254   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11255   /// \return Result is \p BitWidth and has used bits set to 1 and
11256   ///         not used bits set to 0.
11257   APInt getUsedBits() const {
11258     // Reproduce the trunc(lshr) sequence:
11259     // - Start from the truncated value.
11260     // - Zero extend to the desired bit width.
11261     // - Shift left.
11262     assert(Origin && "No original load to compare against.");
11263     unsigned BitWidth = Origin->getValueSizeInBits(0);
11264     assert(Inst && "This slice is not bound to an instruction");
11265     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11266            "Extracted slice is bigger than the whole type!");
11267     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11268     UsedBits.setAllBits();
11269     UsedBits = UsedBits.zext(BitWidth);
11270     UsedBits <<= Shift;
11271     return UsedBits;
11272   }
11273 
11274   /// \brief Get the size of the slice to be loaded in bytes.
11275   unsigned getLoadedSize() const {
11276     unsigned SliceSize = getUsedBits().countPopulation();
11277     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11278     return SliceSize / 8;
11279   }
11280 
11281   /// \brief Get the type that will be loaded for this slice.
11282   /// Note: This may not be the final type for the slice.
11283   EVT getLoadedType() const {
11284     assert(DAG && "Missing context");
11285     LLVMContext &Ctxt = *DAG->getContext();
11286     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11287   }
11288 
11289   /// \brief Get the alignment of the load used for this slice.
11290   unsigned getAlignment() const {
11291     unsigned Alignment = Origin->getAlignment();
11292     unsigned Offset = getOffsetFromBase();
11293     if (Offset != 0)
11294       Alignment = MinAlign(Alignment, Alignment + Offset);
11295     return Alignment;
11296   }
11297 
11298   /// \brief Check if this slice can be rewritten with legal operations.
11299   bool isLegal() const {
11300     // An invalid slice is not legal.
11301     if (!Origin || !Inst || !DAG)
11302       return false;
11303 
11304     // Offsets are for indexed load only, we do not handle that.
11305     if (!Origin->getOffset().isUndef())
11306       return false;
11307 
11308     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11309 
11310     // Check that the type is legal.
11311     EVT SliceType = getLoadedType();
11312     if (!TLI.isTypeLegal(SliceType))
11313       return false;
11314 
11315     // Check that the load is legal for this type.
11316     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11317       return false;
11318 
11319     // Check that the offset can be computed.
11320     // 1. Check its type.
11321     EVT PtrType = Origin->getBasePtr().getValueType();
11322     if (PtrType == MVT::Untyped || PtrType.isExtended())
11323       return false;
11324 
11325     // 2. Check that it fits in the immediate.
11326     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11327       return false;
11328 
11329     // 3. Check that the computation is legal.
11330     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11331       return false;
11332 
11333     // Check that the zext is legal if it needs one.
11334     EVT TruncateType = Inst->getValueType(0);
11335     if (TruncateType != SliceType &&
11336         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11337       return false;
11338 
11339     return true;
11340   }
11341 
11342   /// \brief Get the offset in bytes of this slice in the original chunk of
11343   /// bits.
11344   /// \pre DAG != nullptr.
11345   uint64_t getOffsetFromBase() const {
11346     assert(DAG && "Missing context.");
11347     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11348     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11349     uint64_t Offset = Shift / 8;
11350     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11351     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11352            "The size of the original loaded type is not a multiple of a"
11353            " byte.");
11354     // If Offset is bigger than TySizeInBytes, it means we are loading all
11355     // zeros. This should have been optimized before in the process.
11356     assert(TySizeInBytes > Offset &&
11357            "Invalid shift amount for given loaded size");
11358     if (IsBigEndian)
11359       Offset = TySizeInBytes - Offset - getLoadedSize();
11360     return Offset;
11361   }
11362 
11363   /// \brief Generate the sequence of instructions to load the slice
11364   /// represented by this object and redirect the uses of this slice to
11365   /// this new sequence of instructions.
11366   /// \pre this->Inst && this->Origin are valid Instructions and this
11367   /// object passed the legal check: LoadedSlice::isLegal returned true.
11368   /// \return The last instruction of the sequence used to load the slice.
11369   SDValue loadSlice() const {
11370     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11371     const SDValue &OldBaseAddr = Origin->getBasePtr();
11372     SDValue BaseAddr = OldBaseAddr;
11373     // Get the offset in that chunk of bytes w.r.t. the endianness.
11374     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11375     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11376     if (Offset) {
11377       // BaseAddr = BaseAddr + Offset.
11378       EVT ArithType = BaseAddr.getValueType();
11379       SDLoc DL(Origin);
11380       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11381                               DAG->getConstant(Offset, DL, ArithType));
11382     }
11383 
11384     // Create the type of the loaded slice according to its size.
11385     EVT SliceType = getLoadedType();
11386 
11387     // Create the load for the slice.
11388     SDValue LastInst =
11389         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11390                      Origin->getPointerInfo().getWithOffset(Offset),
11391                      getAlignment(), Origin->getMemOperand()->getFlags());
11392     // If the final type is not the same as the loaded type, this means that
11393     // we have to pad with zero. Create a zero extend for that.
11394     EVT FinalType = Inst->getValueType(0);
11395     if (SliceType != FinalType)
11396       LastInst =
11397           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11398     return LastInst;
11399   }
11400 
11401   /// \brief Check if this slice can be merged with an expensive cross register
11402   /// bank copy. E.g.,
11403   /// i = load i32
11404   /// f = bitcast i32 i to float
11405   bool canMergeExpensiveCrossRegisterBankCopy() const {
11406     if (!Inst || !Inst->hasOneUse())
11407       return false;
11408     SDNode *Use = *Inst->use_begin();
11409     if (Use->getOpcode() != ISD::BITCAST)
11410       return false;
11411     assert(DAG && "Missing context");
11412     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11413     EVT ResVT = Use->getValueType(0);
11414     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11415     const TargetRegisterClass *ArgRC =
11416         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11417     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11418       return false;
11419 
11420     // At this point, we know that we perform a cross-register-bank copy.
11421     // Check if it is expensive.
11422     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11423     // Assume bitcasts are cheap, unless both register classes do not
11424     // explicitly share a common sub class.
11425     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11426       return false;
11427 
11428     // Check if it will be merged with the load.
11429     // 1. Check the alignment constraint.
11430     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11431         ResVT.getTypeForEVT(*DAG->getContext()));
11432 
11433     if (RequiredAlignment > getAlignment())
11434       return false;
11435 
11436     // 2. Check that the load is a legal operation for that type.
11437     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11438       return false;
11439 
11440     // 3. Check that we do not have a zext in the way.
11441     if (Inst->getValueType(0) != getLoadedType())
11442       return false;
11443 
11444     return true;
11445   }
11446 };
11447 }
11448 
11449 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11450 /// \p UsedBits looks like 0..0 1..1 0..0.
11451 static bool areUsedBitsDense(const APInt &UsedBits) {
11452   // If all the bits are one, this is dense!
11453   if (UsedBits.isAllOnesValue())
11454     return true;
11455 
11456   // Get rid of the unused bits on the right.
11457   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11458   // Get rid of the unused bits on the left.
11459   if (NarrowedUsedBits.countLeadingZeros())
11460     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11461   // Check that the chunk of bits is completely used.
11462   return NarrowedUsedBits.isAllOnesValue();
11463 }
11464 
11465 /// \brief Check whether or not \p First and \p Second are next to each other
11466 /// in memory. This means that there is no hole between the bits loaded
11467 /// by \p First and the bits loaded by \p Second.
11468 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11469                                      const LoadedSlice &Second) {
11470   assert(First.Origin == Second.Origin && First.Origin &&
11471          "Unable to match different memory origins.");
11472   APInt UsedBits = First.getUsedBits();
11473   assert((UsedBits & Second.getUsedBits()) == 0 &&
11474          "Slices are not supposed to overlap.");
11475   UsedBits |= Second.getUsedBits();
11476   return areUsedBitsDense(UsedBits);
11477 }
11478 
11479 /// \brief Adjust the \p GlobalLSCost according to the target
11480 /// paring capabilities and the layout of the slices.
11481 /// \pre \p GlobalLSCost should account for at least as many loads as
11482 /// there is in the slices in \p LoadedSlices.
11483 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11484                                  LoadedSlice::Cost &GlobalLSCost) {
11485   unsigned NumberOfSlices = LoadedSlices.size();
11486   // If there is less than 2 elements, no pairing is possible.
11487   if (NumberOfSlices < 2)
11488     return;
11489 
11490   // Sort the slices so that elements that are likely to be next to each
11491   // other in memory are next to each other in the list.
11492   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11493             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11494     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11495     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11496   });
11497   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11498   // First (resp. Second) is the first (resp. Second) potentially candidate
11499   // to be placed in a paired load.
11500   const LoadedSlice *First = nullptr;
11501   const LoadedSlice *Second = nullptr;
11502   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11503                 // Set the beginning of the pair.
11504                                                            First = Second) {
11505 
11506     Second = &LoadedSlices[CurrSlice];
11507 
11508     // If First is NULL, it means we start a new pair.
11509     // Get to the next slice.
11510     if (!First)
11511       continue;
11512 
11513     EVT LoadedType = First->getLoadedType();
11514 
11515     // If the types of the slices are different, we cannot pair them.
11516     if (LoadedType != Second->getLoadedType())
11517       continue;
11518 
11519     // Check if the target supplies paired loads for this type.
11520     unsigned RequiredAlignment = 0;
11521     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11522       // move to the next pair, this type is hopeless.
11523       Second = nullptr;
11524       continue;
11525     }
11526     // Check if we meet the alignment requirement.
11527     if (RequiredAlignment > First->getAlignment())
11528       continue;
11529 
11530     // Check that both loads are next to each other in memory.
11531     if (!areSlicesNextToEachOther(*First, *Second))
11532       continue;
11533 
11534     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11535     --GlobalLSCost.Loads;
11536     // Move to the next pair.
11537     Second = nullptr;
11538   }
11539 }
11540 
11541 /// \brief Check the profitability of all involved LoadedSlice.
11542 /// Currently, it is considered profitable if there is exactly two
11543 /// involved slices (1) which are (2) next to each other in memory, and
11544 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11545 ///
11546 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11547 /// the elements themselves.
11548 ///
11549 /// FIXME: When the cost model will be mature enough, we can relax
11550 /// constraints (1) and (2).
11551 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11552                                 const APInt &UsedBits, bool ForCodeSize) {
11553   unsigned NumberOfSlices = LoadedSlices.size();
11554   if (StressLoadSlicing)
11555     return NumberOfSlices > 1;
11556 
11557   // Check (1).
11558   if (NumberOfSlices != 2)
11559     return false;
11560 
11561   // Check (2).
11562   if (!areUsedBitsDense(UsedBits))
11563     return false;
11564 
11565   // Check (3).
11566   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11567   // The original code has one big load.
11568   OrigCost.Loads = 1;
11569   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11570     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11571     // Accumulate the cost of all the slices.
11572     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11573     GlobalSlicingCost += SliceCost;
11574 
11575     // Account as cost in the original configuration the gain obtained
11576     // with the current slices.
11577     OrigCost.addSliceGain(LS);
11578   }
11579 
11580   // If the target supports paired load, adjust the cost accordingly.
11581   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11582   return OrigCost > GlobalSlicingCost;
11583 }
11584 
11585 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11586 /// operations, split it in the various pieces being extracted.
11587 ///
11588 /// This sort of thing is introduced by SROA.
11589 /// This slicing takes care not to insert overlapping loads.
11590 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11591 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11592   if (Level < AfterLegalizeDAG)
11593     return false;
11594 
11595   LoadSDNode *LD = cast<LoadSDNode>(N);
11596   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11597       !LD->getValueType(0).isInteger())
11598     return false;
11599 
11600   // Keep track of already used bits to detect overlapping values.
11601   // In that case, we will just abort the transformation.
11602   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11603 
11604   SmallVector<LoadedSlice, 4> LoadedSlices;
11605 
11606   // Check if this load is used as several smaller chunks of bits.
11607   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11608   // of computation for each trunc.
11609   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11610        UI != UIEnd; ++UI) {
11611     // Skip the uses of the chain.
11612     if (UI.getUse().getResNo() != 0)
11613       continue;
11614 
11615     SDNode *User = *UI;
11616     unsigned Shift = 0;
11617 
11618     // Check if this is a trunc(lshr).
11619     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11620         isa<ConstantSDNode>(User->getOperand(1))) {
11621       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
11622       User = *User->use_begin();
11623     }
11624 
11625     // At this point, User is a Truncate, iff we encountered, trunc or
11626     // trunc(lshr).
11627     if (User->getOpcode() != ISD::TRUNCATE)
11628       return false;
11629 
11630     // The width of the type must be a power of 2 and greater than 8-bits.
11631     // Otherwise the load cannot be represented in LLVM IR.
11632     // Moreover, if we shifted with a non-8-bits multiple, the slice
11633     // will be across several bytes. We do not support that.
11634     unsigned Width = User->getValueSizeInBits(0);
11635     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11636       return 0;
11637 
11638     // Build the slice for this chain of computations.
11639     LoadedSlice LS(User, LD, Shift, &DAG);
11640     APInt CurrentUsedBits = LS.getUsedBits();
11641 
11642     // Check if this slice overlaps with another.
11643     if ((CurrentUsedBits & UsedBits) != 0)
11644       return false;
11645     // Update the bits used globally.
11646     UsedBits |= CurrentUsedBits;
11647 
11648     // Check if the new slice would be legal.
11649     if (!LS.isLegal())
11650       return false;
11651 
11652     // Record the slice.
11653     LoadedSlices.push_back(LS);
11654   }
11655 
11656   // Abort slicing if it does not seem to be profitable.
11657   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11658     return false;
11659 
11660   ++SlicedLoads;
11661 
11662   // Rewrite each chain to use an independent load.
11663   // By construction, each chain can be represented by a unique load.
11664 
11665   // Prepare the argument for the new token factor for all the slices.
11666   SmallVector<SDValue, 8> ArgChains;
11667   for (SmallVectorImpl<LoadedSlice>::const_iterator
11668            LSIt = LoadedSlices.begin(),
11669            LSItEnd = LoadedSlices.end();
11670        LSIt != LSItEnd; ++LSIt) {
11671     SDValue SliceInst = LSIt->loadSlice();
11672     CombineTo(LSIt->Inst, SliceInst, true);
11673     if (SliceInst.getOpcode() != ISD::LOAD)
11674       SliceInst = SliceInst.getOperand(0);
11675     assert(SliceInst->getOpcode() == ISD::LOAD &&
11676            "It takes more than a zext to get to the loaded slice!!");
11677     ArgChains.push_back(SliceInst.getValue(1));
11678   }
11679 
11680   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11681                               ArgChains);
11682   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11683   AddToWorklist(Chain.getNode());
11684   return true;
11685 }
11686 
11687 /// Check to see if V is (and load (ptr), imm), where the load is having
11688 /// specific bytes cleared out.  If so, return the byte size being masked out
11689 /// and the shift amount.
11690 static std::pair<unsigned, unsigned>
11691 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
11692   std::pair<unsigned, unsigned> Result(0, 0);
11693 
11694   // Check for the structure we're looking for.
11695   if (V->getOpcode() != ISD::AND ||
11696       !isa<ConstantSDNode>(V->getOperand(1)) ||
11697       !ISD::isNormalLoad(V->getOperand(0).getNode()))
11698     return Result;
11699 
11700   // Check the chain and pointer.
11701   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11702   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
11703 
11704   // The store should be chained directly to the load or be an operand of a
11705   // tokenfactor.
11706   if (LD == Chain.getNode())
11707     ; // ok.
11708   else if (Chain->getOpcode() != ISD::TokenFactor)
11709     return Result; // Fail.
11710   else {
11711     bool isOk = false;
11712     for (const SDValue &ChainOp : Chain->op_values())
11713       if (ChainOp.getNode() == LD) {
11714         isOk = true;
11715         break;
11716       }
11717     if (!isOk) return Result;
11718   }
11719 
11720   // This only handles simple types.
11721   if (V.getValueType() != MVT::i16 &&
11722       V.getValueType() != MVT::i32 &&
11723       V.getValueType() != MVT::i64)
11724     return Result;
11725 
11726   // Check the constant mask.  Invert it so that the bits being masked out are
11727   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
11728   // follow the sign bit for uniformity.
11729   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
11730   unsigned NotMaskLZ = countLeadingZeros(NotMask);
11731   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
11732   unsigned NotMaskTZ = countTrailingZeros(NotMask);
11733   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
11734   if (NotMaskLZ == 64) return Result;  // All zero mask.
11735 
11736   // See if we have a continuous run of bits.  If so, we have 0*1+0*
11737   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
11738     return Result;
11739 
11740   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
11741   if (V.getValueType() != MVT::i64 && NotMaskLZ)
11742     NotMaskLZ -= 64-V.getValueSizeInBits();
11743 
11744   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
11745   switch (MaskedBytes) {
11746   case 1:
11747   case 2:
11748   case 4: break;
11749   default: return Result; // All one mask, or 5-byte mask.
11750   }
11751 
11752   // Verify that the first bit starts at a multiple of mask so that the access
11753   // is aligned the same as the access width.
11754   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
11755 
11756   Result.first = MaskedBytes;
11757   Result.second = NotMaskTZ/8;
11758   return Result;
11759 }
11760 
11761 
11762 /// Check to see if IVal is something that provides a value as specified by
11763 /// MaskInfo. If so, replace the specified store with a narrower store of
11764 /// truncated IVal.
11765 static SDNode *
11766 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
11767                                 SDValue IVal, StoreSDNode *St,
11768                                 DAGCombiner *DC) {
11769   unsigned NumBytes = MaskInfo.first;
11770   unsigned ByteShift = MaskInfo.second;
11771   SelectionDAG &DAG = DC->getDAG();
11772 
11773   // Check to see if IVal is all zeros in the part being masked in by the 'or'
11774   // that uses this.  If not, this is not a replacement.
11775   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
11776                                   ByteShift*8, (ByteShift+NumBytes)*8);
11777   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
11778 
11779   // Check that it is legal on the target to do this.  It is legal if the new
11780   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
11781   // legalization.
11782   MVT VT = MVT::getIntegerVT(NumBytes*8);
11783   if (!DC->isTypeLegal(VT))
11784     return nullptr;
11785 
11786   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
11787   // shifted by ByteShift and truncated down to NumBytes.
11788   if (ByteShift) {
11789     SDLoc DL(IVal);
11790     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
11791                        DAG.getConstant(ByteShift*8, DL,
11792                                     DC->getShiftAmountTy(IVal.getValueType())));
11793   }
11794 
11795   // Figure out the offset for the store and the alignment of the access.
11796   unsigned StOffset;
11797   unsigned NewAlign = St->getAlignment();
11798 
11799   if (DAG.getDataLayout().isLittleEndian())
11800     StOffset = ByteShift;
11801   else
11802     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
11803 
11804   SDValue Ptr = St->getBasePtr();
11805   if (StOffset) {
11806     SDLoc DL(IVal);
11807     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
11808                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
11809     NewAlign = MinAlign(NewAlign, StOffset);
11810   }
11811 
11812   // Truncate down to the new size.
11813   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
11814 
11815   ++OpsNarrowed;
11816   return DAG
11817       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
11818                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
11819       .getNode();
11820 }
11821 
11822 
11823 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
11824 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
11825 /// narrowing the load and store if it would end up being a win for performance
11826 /// or code size.
11827 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
11828   StoreSDNode *ST  = cast<StoreSDNode>(N);
11829   if (ST->isVolatile())
11830     return SDValue();
11831 
11832   SDValue Chain = ST->getChain();
11833   SDValue Value = ST->getValue();
11834   SDValue Ptr   = ST->getBasePtr();
11835   EVT VT = Value.getValueType();
11836 
11837   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
11838     return SDValue();
11839 
11840   unsigned Opc = Value.getOpcode();
11841 
11842   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
11843   // is a byte mask indicating a consecutive number of bytes, check to see if
11844   // Y is known to provide just those bytes.  If so, we try to replace the
11845   // load + replace + store sequence with a single (narrower) store, which makes
11846   // the load dead.
11847   if (Opc == ISD::OR) {
11848     std::pair<unsigned, unsigned> MaskedLoad;
11849     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
11850     if (MaskedLoad.first)
11851       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11852                                                   Value.getOperand(1), ST,this))
11853         return SDValue(NewST, 0);
11854 
11855     // Or is commutative, so try swapping X and Y.
11856     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
11857     if (MaskedLoad.first)
11858       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11859                                                   Value.getOperand(0), ST,this))
11860         return SDValue(NewST, 0);
11861   }
11862 
11863   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
11864       Value.getOperand(1).getOpcode() != ISD::Constant)
11865     return SDValue();
11866 
11867   SDValue N0 = Value.getOperand(0);
11868   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11869       Chain == SDValue(N0.getNode(), 1)) {
11870     LoadSDNode *LD = cast<LoadSDNode>(N0);
11871     if (LD->getBasePtr() != Ptr ||
11872         LD->getPointerInfo().getAddrSpace() !=
11873         ST->getPointerInfo().getAddrSpace())
11874       return SDValue();
11875 
11876     // Find the type to narrow it the load / op / store to.
11877     SDValue N1 = Value.getOperand(1);
11878     unsigned BitWidth = N1.getValueSizeInBits();
11879     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
11880     if (Opc == ISD::AND)
11881       Imm ^= APInt::getAllOnesValue(BitWidth);
11882     if (Imm == 0 || Imm.isAllOnesValue())
11883       return SDValue();
11884     unsigned ShAmt = Imm.countTrailingZeros();
11885     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
11886     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
11887     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11888     // The narrowing should be profitable, the load/store operation should be
11889     // legal (or custom) and the store size should be equal to the NewVT width.
11890     while (NewBW < BitWidth &&
11891            (NewVT.getStoreSizeInBits() != NewBW ||
11892             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
11893             !TLI.isNarrowingProfitable(VT, NewVT))) {
11894       NewBW = NextPowerOf2(NewBW);
11895       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11896     }
11897     if (NewBW >= BitWidth)
11898       return SDValue();
11899 
11900     // If the lsb changed does not start at the type bitwidth boundary,
11901     // start at the previous one.
11902     if (ShAmt % NewBW)
11903       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
11904     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
11905                                    std::min(BitWidth, ShAmt + NewBW));
11906     if ((Imm & Mask) == Imm) {
11907       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
11908       if (Opc == ISD::AND)
11909         NewImm ^= APInt::getAllOnesValue(NewBW);
11910       uint64_t PtrOff = ShAmt / 8;
11911       // For big endian targets, we need to adjust the offset to the pointer to
11912       // load the correct bytes.
11913       if (DAG.getDataLayout().isBigEndian())
11914         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11915 
11916       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11917       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11918       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11919         return SDValue();
11920 
11921       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11922                                    Ptr.getValueType(), Ptr,
11923                                    DAG.getConstant(PtrOff, SDLoc(LD),
11924                                                    Ptr.getValueType()));
11925       SDValue NewLD =
11926           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11927                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11928                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
11929       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11930                                    DAG.getConstant(NewImm, SDLoc(Value),
11931                                                    NewVT));
11932       SDValue NewST =
11933           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11934                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11935 
11936       AddToWorklist(NewPtr.getNode());
11937       AddToWorklist(NewLD.getNode());
11938       AddToWorklist(NewVal.getNode());
11939       WorklistRemover DeadNodes(*this);
11940       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11941       ++OpsNarrowed;
11942       return NewST;
11943     }
11944   }
11945 
11946   return SDValue();
11947 }
11948 
11949 /// For a given floating point load / store pair, if the load value isn't used
11950 /// by any other operations, then consider transforming the pair to integer
11951 /// load / store operations if the target deems the transformation profitable.
11952 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11953   StoreSDNode *ST  = cast<StoreSDNode>(N);
11954   SDValue Chain = ST->getChain();
11955   SDValue Value = ST->getValue();
11956   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11957       Value.hasOneUse() &&
11958       Chain == SDValue(Value.getNode(), 1)) {
11959     LoadSDNode *LD = cast<LoadSDNode>(Value);
11960     EVT VT = LD->getMemoryVT();
11961     if (!VT.isFloatingPoint() ||
11962         VT != ST->getMemoryVT() ||
11963         LD->isNonTemporal() ||
11964         ST->isNonTemporal() ||
11965         LD->getPointerInfo().getAddrSpace() != 0 ||
11966         ST->getPointerInfo().getAddrSpace() != 0)
11967       return SDValue();
11968 
11969     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11970     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11971         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11972         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
11973         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
11974       return SDValue();
11975 
11976     unsigned LDAlign = LD->getAlignment();
11977     unsigned STAlign = ST->getAlignment();
11978     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11979     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11980     if (LDAlign < ABIAlign || STAlign < ABIAlign)
11981       return SDValue();
11982 
11983     SDValue NewLD =
11984         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11985                     LD->getPointerInfo(), LDAlign);
11986 
11987     SDValue NewST =
11988         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11989                      ST->getPointerInfo(), STAlign);
11990 
11991     AddToWorklist(NewLD.getNode());
11992     AddToWorklist(NewST.getNode());
11993     WorklistRemover DeadNodes(*this);
11994     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11995     ++LdStFP2Int;
11996     return NewST;
11997   }
11998 
11999   return SDValue();
12000 }
12001 
12002 // This is a helper function for visitMUL to check the profitability
12003 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12004 // MulNode is the original multiply, AddNode is (add x, c1),
12005 // and ConstNode is c2.
12006 //
12007 // If the (add x, c1) has multiple uses, we could increase
12008 // the number of adds if we make this transformation.
12009 // It would only be worth doing this if we can remove a
12010 // multiply in the process. Check for that here.
12011 // To illustrate:
12012 //     (A + c1) * c3
12013 //     (A + c2) * c3
12014 // We're checking for cases where we have common "c3 * A" expressions.
12015 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12016                                               SDValue &AddNode,
12017                                               SDValue &ConstNode) {
12018   APInt Val;
12019 
12020   // If the add only has one use, this would be OK to do.
12021   if (AddNode.getNode()->hasOneUse())
12022     return true;
12023 
12024   // Walk all the users of the constant with which we're multiplying.
12025   for (SDNode *Use : ConstNode->uses()) {
12026 
12027     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12028       continue;
12029 
12030     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12031       SDNode *OtherOp;
12032       SDNode *MulVar = AddNode.getOperand(0).getNode();
12033 
12034       // OtherOp is what we're multiplying against the constant.
12035       if (Use->getOperand(0) == ConstNode)
12036         OtherOp = Use->getOperand(1).getNode();
12037       else
12038         OtherOp = Use->getOperand(0).getNode();
12039 
12040       // Check to see if multiply is with the same operand of our "add".
12041       //
12042       //     ConstNode  = CONST
12043       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12044       //     ...
12045       //     AddNode  = (A + c1)  <-- MulVar is A.
12046       //         = AddNode * ConstNode   <-- current visiting instruction.
12047       //
12048       // If we make this transformation, we will have a common
12049       // multiply (ConstNode * A) that we can save.
12050       if (OtherOp == MulVar)
12051         return true;
12052 
12053       // Now check to see if a future expansion will give us a common
12054       // multiply.
12055       //
12056       //     ConstNode  = CONST
12057       //     AddNode    = (A + c1)
12058       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12059       //     ...
12060       //     OtherOp = (A + c2)
12061       //     Use     = OtherOp * ConstNode <-- visiting Use.
12062       //
12063       // If we make this transformation, we will have a common
12064       // multiply (CONST * A) after we also do the same transformation
12065       // to the "t2" instruction.
12066       if (OtherOp->getOpcode() == ISD::ADD &&
12067           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12068           OtherOp->getOperand(0).getNode() == MulVar)
12069         return true;
12070     }
12071   }
12072 
12073   // Didn't find a case where this would be profitable.
12074   return false;
12075 }
12076 
12077 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12078                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
12079                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
12080   // Make sure we have something to merge.
12081   if (NumStores < 2)
12082     return false;
12083 
12084   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12085 
12086   // The latest Node in the DAG.
12087   SDLoc DL(StoreNodes[0].MemNode);
12088 
12089   SDValue StoredVal;
12090   if (UseVector) {
12091     bool IsVec = MemVT.isVector();
12092     unsigned Elts = NumStores;
12093     if (IsVec) {
12094       // When merging vector stores, get the total number of elements.
12095       Elts *= MemVT.getVectorNumElements();
12096     }
12097     // Get the type for the merged vector store.
12098     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12099     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12100 
12101     if (IsConstantSrc) {
12102       SmallVector<SDValue, 8> BuildVector;
12103       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12104         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12105         SDValue Val = St->getValue();
12106         if (MemVT.getScalarType().isInteger())
12107           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12108             Val = DAG.getConstant(
12109                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12110                 SDLoc(CFP), MemVT);
12111         BuildVector.push_back(Val);
12112       }
12113       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12114     } else {
12115       SmallVector<SDValue, 8> Ops;
12116       for (unsigned i = 0; i < NumStores; ++i) {
12117         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12118         SDValue Val = St->getValue();
12119         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12120         if (Val.getValueType() != MemVT)
12121           return false;
12122         Ops.push_back(Val);
12123       }
12124 
12125       // Build the extracted vector elements back into a vector.
12126       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12127                               DL, Ty, Ops);    }
12128   } else {
12129     // We should always use a vector store when merging extracted vector
12130     // elements, so this path implies a store of constants.
12131     assert(IsConstantSrc && "Merged vector elements should use vector store");
12132 
12133     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12134     APInt StoreInt(SizeInBits, 0);
12135 
12136     // Construct a single integer constant which is made of the smaller
12137     // constant inputs.
12138     bool IsLE = DAG.getDataLayout().isLittleEndian();
12139     for (unsigned i = 0; i < NumStores; ++i) {
12140       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12141       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12142 
12143       SDValue Val = St->getValue();
12144       StoreInt <<= ElementSizeBytes * 8;
12145       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12146         StoreInt |= C->getAPIntValue().zext(SizeInBits);
12147       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12148         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
12149       } else {
12150         llvm_unreachable("Invalid constant element type");
12151       }
12152     }
12153 
12154     // Create the new Load and Store operations.
12155     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12156     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12157   }
12158 
12159   SmallVector<SDValue, 8> Chains;
12160 
12161   // Gather all Chains we're inheriting. As generally all chains are
12162   // equal, do minor check to remove obvious redundancies.
12163   Chains.push_back(StoreNodes[0].MemNode->getChain());
12164   for (unsigned i = 1; i < NumStores; ++i)
12165     if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
12166       Chains.push_back(StoreNodes[i].MemNode->getChain());
12167 
12168   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12169   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12170   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
12171                                   FirstInChain->getBasePtr(),
12172                                   FirstInChain->getPointerInfo(),
12173                                   FirstInChain->getAlignment());
12174 
12175   // Replace all merged stores with the new store.
12176   for (unsigned i = 0; i < NumStores; ++i)
12177     CombineTo(StoreNodes[i].MemNode, NewStore);
12178 
12179   AddToWorklist(NewChain.getNode());
12180   return true;
12181 }
12182 
12183 void DAGCombiner::getStoreMergeCandidates(
12184     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12185   // This holds the base pointer, index, and the offset in bytes from the base
12186   // pointer.
12187   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12188   EVT MemVT = St->getMemoryVT();
12189 
12190   // We must have a base and an offset.
12191   if (!BasePtr.Base.getNode())
12192     return;
12193 
12194   // Do not handle stores to undef base pointers.
12195   if (BasePtr.Base.isUndef())
12196     return;
12197 
12198   // We looking for a root node which is an ancestor to all mergable
12199   // stores. We search up through a load, to our root and then down
12200   // through all children. For instance we will find Store{1,2,3} if
12201   // St is Store1, Store2. or Store3 where the root is not a load
12202   // which always true for nonvolatile ops. TODO: Expand
12203   // the search to find all valid candidates through multiple layers of loads.
12204   //
12205   // Root
12206   // |-------|-------|
12207   // Load    Load    Store3
12208   // |       |
12209   // Store1   Store2
12210   //
12211   // FIXME: We should be able to climb and
12212   // descend TokenFactors to find candidates as well.
12213 
12214   SDNode *RootNode = (St->getChain()).getNode();
12215 
12216   // Set of Parents of Candidates
12217   std::set<SDNode *> CandidateParents;
12218 
12219   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12220     RootNode = Ldn->getChain().getNode();
12221     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12222       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12223         CandidateParents.insert(*I);
12224   } else
12225     CandidateParents.insert(RootNode);
12226 
12227   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12228   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12229                        isa<ConstantFPSDNode>(St->getValue());
12230   bool IsExtractVecSrc =
12231       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12232        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12233   auto CorrectValueKind = [&](StoreSDNode *Other) -> bool {
12234     if (IsLoadSrc)
12235       return isa<LoadSDNode>(Other->getValue());
12236     if (IsConstantSrc)
12237       return (isa<ConstantSDNode>(Other->getValue()) ||
12238               isa<ConstantFPSDNode>(Other->getValue()));
12239     if (IsExtractVecSrc)
12240       return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12241               Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12242     return false;
12243   };
12244 
12245   // check all parents of mergable children
12246   for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P)
12247     for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I)
12248       if (I.getOperandNo() == 0)
12249         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12250           if (OtherST->isVolatile() || OtherST->isIndexed())
12251             continue;
12252           // We can merge constant floats to equivalent integers
12253           if (OtherST->getMemoryVT() != MemVT)
12254             if (!(MemVT.isInteger() && MemVT.bitsEq(OtherST->getMemoryVT()) &&
12255                   isa<ConstantFPSDNode>(OtherST->getValue())))
12256               continue;
12257           BaseIndexOffset Ptr =
12258               BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
12259           if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST))
12260             StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
12261         }
12262 }
12263 
12264 // We need to check that merging these stores does not cause a loop
12265 // in the DAG. Any store candidate may depend on another candidate
12266 // indirectly through its operand (we already consider dependencies
12267 // through the chain). Check in parallel by searching up from
12268 // non-chain operands of candidates.
12269 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12270     SmallVectorImpl<MemOpLink> &StoreNodes) {
12271   SmallPtrSet<const SDNode *, 16> Visited;
12272   SmallVector<const SDNode *, 8> Worklist;
12273   // search ops of store candidates
12274   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
12275     SDNode *n = StoreNodes[i].MemNode;
12276     // Potential loops may happen only through non-chain operands
12277     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12278       Worklist.push_back(n->getOperand(j).getNode());
12279   }
12280   // search through DAG. We can stop early if we find a storenode
12281   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
12282     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12283       return false;
12284   }
12285   return true;
12286 }
12287 
12288 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12289   if (OptLevel == CodeGenOpt::None)
12290     return false;
12291 
12292   EVT MemVT = St->getMemoryVT();
12293   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12294 
12295   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12296     return false;
12297 
12298   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12299       Attribute::NoImplicitFloat);
12300 
12301   // This function cannot currently deal with non-byte-sized memory sizes.
12302   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12303     return false;
12304 
12305   if (!MemVT.isSimple())
12306     return false;
12307 
12308   // Perform an early exit check. Do not bother looking at stored values that
12309   // are not constants, loads, or extracted vector elements.
12310   SDValue StoredVal = St->getValue();
12311   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12312   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12313                        isa<ConstantFPSDNode>(StoredVal);
12314   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12315                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12316 
12317   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12318     return false;
12319 
12320   // Don't merge vectors into wider vectors if the source data comes from loads.
12321   // TODO: This restriction can be lifted by using logic similar to the
12322   // ExtractVecSrc case.
12323   if (MemVT.isVector() && IsLoadSrc)
12324     return false;
12325 
12326   SmallVector<MemOpLink, 8> StoreNodes;
12327   // Find potential store merge candidates by searching through chain sub-DAG
12328   getStoreMergeCandidates(St, StoreNodes);
12329 
12330   // Check if there is anything to merge.
12331   if (StoreNodes.size() < 2)
12332     return false;
12333 
12334   // Check that we can merge these candidates without causing a cycle
12335   if (!checkMergeStoreCandidatesForDependencies(StoreNodes))
12336     return false;
12337 
12338   // Sort the memory operands according to their distance from the
12339   // base pointer.
12340   std::sort(StoreNodes.begin(), StoreNodes.end(),
12341             [](MemOpLink LHS, MemOpLink RHS) {
12342               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12343             });
12344 
12345   // Scan the memory operations on the chain and find the first non-consecutive
12346   // store memory address.
12347   unsigned NumConsecutiveStores = 0;
12348   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12349 
12350   // Check that the addresses are consecutive starting from the second
12351   // element in the list of stores.
12352   for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12353     int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12354     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12355       break;
12356     NumConsecutiveStores = i + 1;
12357   }
12358 
12359   if (NumConsecutiveStores < 2)
12360     return false;
12361 
12362   // The node with the lowest store address.
12363   LLVMContext &Context = *DAG.getContext();
12364   const DataLayout &DL = DAG.getDataLayout();
12365 
12366   // Store the constants into memory as one consecutive store.
12367   if (IsConstantSrc) {
12368     bool RV = false;
12369     while (NumConsecutiveStores > 1) {
12370       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12371       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12372       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12373       unsigned LastLegalType = 0;
12374       unsigned LastLegalVectorType = 0;
12375       bool NonZero = false;
12376       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12377         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12378         SDValue StoredVal = ST->getValue();
12379 
12380         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12381           NonZero |= !C->isNullValue();
12382         } else if (ConstantFPSDNode *C =
12383                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12384           NonZero |= !C->getConstantFPValue()->isNullValue();
12385         } else {
12386           // Non-constant.
12387           break;
12388         }
12389 
12390         // Find a legal type for the constant store.
12391         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12392         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12393         bool IsFast = false;
12394         if (TLI.isTypeLegal(StoreTy) &&
12395             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12396                                    FirstStoreAlign, &IsFast) &&
12397             IsFast) {
12398           LastLegalType = i + 1;
12399           // Or check whether a truncstore is legal.
12400         } else if (TLI.getTypeAction(Context, StoreTy) ==
12401                    TargetLowering::TypePromoteInteger) {
12402           EVT LegalizedStoredValueTy =
12403               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12404           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12405               TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12406                                      FirstStoreAS, FirstStoreAlign, &IsFast) &&
12407               IsFast) {
12408             LastLegalType = i + 1;
12409           }
12410         }
12411 
12412         // We only use vectors if the constant is known to be zero or the target
12413         // allows it and the function is not marked with the noimplicitfloat
12414         // attribute.
12415         if ((!NonZero ||
12416              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12417             !NoVectors) {
12418           // Find a legal type for the vector store.
12419           EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12420           if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
12421               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12422                                      FirstStoreAlign, &IsFast) &&
12423               IsFast)
12424             LastLegalVectorType = i + 1;
12425         }
12426       }
12427 
12428       // Check if we found a legal integer type that creates a meaningful merge.
12429       if (LastLegalType < 2 && LastLegalVectorType < 2)
12430         break;
12431 
12432       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12433       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12434 
12435       bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
12436                                                     true, UseVector);
12437       if (!Merged)
12438         break;
12439       // Remove merged stores for next iteration.
12440       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12441       RV = true;
12442       NumConsecutiveStores -= NumElem;
12443     }
12444     return RV;
12445   }
12446 
12447   // When extracting multiple vector elements, try to store them
12448   // in one vector store rather than a sequence of scalar stores.
12449   if (IsExtractVecSrc) {
12450     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12451     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12452     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12453     unsigned NumStoresToMerge = 0;
12454     bool IsVec = MemVT.isVector();
12455     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12456       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
12457       unsigned StoreValOpcode = St->getValue().getOpcode();
12458       // This restriction could be loosened.
12459       // Bail out if any stored values are not elements extracted from a vector.
12460       // It should be possible to handle mixed sources, but load sources need
12461       // more careful handling (see the block of code below that handles
12462       // consecutive loads).
12463       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12464           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12465         return false;
12466 
12467       // Find a legal type for the vector store.
12468       unsigned Elts = i + 1;
12469       if (IsVec) {
12470         // When merging vector stores, get the total number of elements.
12471         Elts *= MemVT.getVectorNumElements();
12472       }
12473       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12474       bool IsFast;
12475       if (TLI.isTypeLegal(Ty) &&
12476           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12477                                  FirstStoreAlign, &IsFast) && IsFast)
12478         NumStoresToMerge = i + 1;
12479     }
12480 
12481     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
12482                                            false, true);
12483   }
12484 
12485   // Below we handle the case of multiple consecutive stores that
12486   // come from multiple consecutive loads. We merge them into a single
12487   // wide load and a single wide store.
12488 
12489   // Look for load nodes which are used by the stored values.
12490   SmallVector<MemOpLink, 8> LoadNodes;
12491 
12492   // Find acceptable loads. Loads need to have the same chain (token factor),
12493   // must not be zext, volatile, indexed, and they must be consecutive.
12494   BaseIndexOffset LdBasePtr;
12495   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12496     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
12497     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12498     if (!Ld) break;
12499 
12500     // Loads must only have one use.
12501     if (!Ld->hasNUsesOfValue(1, 0))
12502       break;
12503 
12504     // The memory operands must not be volatile.
12505     if (Ld->isVolatile() || Ld->isIndexed())
12506       break;
12507 
12508     // We do not accept ext loads.
12509     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12510       break;
12511 
12512     // The stored memory type must be the same.
12513     if (Ld->getMemoryVT() != MemVT)
12514       break;
12515 
12516     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12517     // If this is not the first ptr that we check.
12518     if (LdBasePtr.Base.getNode()) {
12519       // The base ptr must be the same.
12520       if (!LdPtr.equalBaseIndex(LdBasePtr))
12521         break;
12522     } else {
12523       // Check that all other base pointers are the same as this one.
12524       LdBasePtr = LdPtr;
12525     }
12526 
12527     // We found a potential memory operand to merge.
12528     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
12529   }
12530 
12531   if (LoadNodes.size() < 2)
12532     return false;
12533 
12534   // If we have load/store pair instructions and we only have two values,
12535   // don't bother.
12536   unsigned RequiredAlignment;
12537   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12538       St->getAlignment() >= RequiredAlignment)
12539     return false;
12540   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12541   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12542   unsigned FirstStoreAlign = FirstInChain->getAlignment();
12543   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12544   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12545   unsigned FirstLoadAlign = FirstLoad->getAlignment();
12546 
12547   // Scan the memory operations on the chain and find the first non-consecutive
12548   // load memory address. These variables hold the index in the store node
12549   // array.
12550   unsigned LastConsecutiveLoad = 0;
12551   // This variable refers to the size and not index in the array.
12552   unsigned LastLegalVectorType = 0;
12553   unsigned LastLegalIntegerType = 0;
12554   StartAddress = LoadNodes[0].OffsetFromBase;
12555   SDValue FirstChain = FirstLoad->getChain();
12556   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12557     // All loads must share the same chain.
12558     if (LoadNodes[i].MemNode->getChain() != FirstChain)
12559       break;
12560 
12561     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
12562     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12563       break;
12564     LastConsecutiveLoad = i;
12565     // Find a legal type for the vector store.
12566     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
12567     bool IsFastSt, IsFastLd;
12568     if (TLI.isTypeLegal(StoreTy) &&
12569         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12570                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
12571         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12572                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
12573       LastLegalVectorType = i + 1;
12574     }
12575 
12576     // Find a legal type for the integer store.
12577     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
12578     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12579     if (TLI.isTypeLegal(StoreTy) &&
12580         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12581                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
12582         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12583                                FirstLoadAlign, &IsFastLd) && IsFastLd)
12584       LastLegalIntegerType = i + 1;
12585     // Or check whether a truncstore and extload is legal.
12586     else if (TLI.getTypeAction(Context, StoreTy) ==
12587              TargetLowering::TypePromoteInteger) {
12588       EVT LegalizedStoredValueTy =
12589         TLI.getTypeToTransformTo(Context, StoreTy);
12590       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12591           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12592           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12593           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12594           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12595                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12596           IsFastSt &&
12597           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12598                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
12599           IsFastLd)
12600         LastLegalIntegerType = i+1;
12601     }
12602   }
12603 
12604   // Only use vector types if the vector type is larger than the integer type.
12605   // If they are the same, use integers.
12606   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12607   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
12608 
12609   // We add +1 here because the LastXXX variables refer to location while
12610   // the NumElem refers to array/index size.
12611   unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12612   NumElem = std::min(LastLegalType, NumElem);
12613 
12614   if (NumElem < 2)
12615     return false;
12616 
12617   // Collect the chains from all merged stores. Because the common case
12618   // all chains are the same, check if we match the first Chain.
12619   SmallVector<SDValue, 8> MergeStoreChains;
12620   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
12621   for (unsigned i = 1; i < NumElem; ++i)
12622     if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
12623       MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
12624 
12625   // Find if it is better to use vectors or integers to load and store
12626   // to memory.
12627   EVT JointMemOpVT;
12628   if (UseVectorTy) {
12629     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12630   } else {
12631     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12632     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12633   }
12634 
12635   SDLoc LoadDL(LoadNodes[0].MemNode);
12636   SDLoc StoreDL(StoreNodes[0].MemNode);
12637 
12638   // The merged loads are required to have the same incoming chain, so
12639   // using the first's chain is acceptable.
12640   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
12641                                 FirstLoad->getBasePtr(),
12642                                 FirstLoad->getPointerInfo(), FirstLoadAlign);
12643 
12644   SDValue NewStoreChain =
12645     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
12646 
12647   AddToWorklist(NewStoreChain.getNode());
12648 
12649   SDValue NewStore =
12650       DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
12651                    FirstInChain->getPointerInfo(), FirstStoreAlign);
12652 
12653   // Transfer chain users from old loads to the new load.
12654   for (unsigned i = 0; i < NumElem; ++i) {
12655     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12656     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12657                                   SDValue(NewLoad.getNode(), 1));
12658   }
12659 
12660   // Replace the all stores with the new store.
12661   for (unsigned i = 0; i < NumElem; ++i)
12662     CombineTo(StoreNodes[i].MemNode, NewStore);
12663   return true;
12664 }
12665 
12666 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12667   SDLoc SL(ST);
12668   SDValue ReplStore;
12669 
12670   // Replace the chain to avoid dependency.
12671   if (ST->isTruncatingStore()) {
12672     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12673                                   ST->getBasePtr(), ST->getMemoryVT(),
12674                                   ST->getMemOperand());
12675   } else {
12676     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12677                              ST->getMemOperand());
12678   }
12679 
12680   // Create token to keep both nodes around.
12681   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12682                               MVT::Other, ST->getChain(), ReplStore);
12683 
12684   // Make sure the new and old chains are cleaned up.
12685   AddToWorklist(Token.getNode());
12686 
12687   // Don't add users to work list.
12688   return CombineTo(ST, Token, false);
12689 }
12690 
12691 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12692   SDValue Value = ST->getValue();
12693   if (Value.getOpcode() == ISD::TargetConstantFP)
12694     return SDValue();
12695 
12696   SDLoc DL(ST);
12697 
12698   SDValue Chain = ST->getChain();
12699   SDValue Ptr = ST->getBasePtr();
12700 
12701   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
12702 
12703   // NOTE: If the original store is volatile, this transform must not increase
12704   // the number of stores.  For example, on x86-32 an f64 can be stored in one
12705   // processor operation but an i64 (which is not legal) requires two.  So the
12706   // transform should not be done in this case.
12707 
12708   SDValue Tmp;
12709   switch (CFP->getSimpleValueType(0).SimpleTy) {
12710   default:
12711     llvm_unreachable("Unknown FP type");
12712   case MVT::f16:    // We don't do this for these yet.
12713   case MVT::f80:
12714   case MVT::f128:
12715   case MVT::ppcf128:
12716     return SDValue();
12717   case MVT::f32:
12718     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12719         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12720       ;
12721       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12722                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12723                             MVT::i32);
12724       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12725     }
12726 
12727     return SDValue();
12728   case MVT::f64:
12729     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12730          !ST->isVolatile()) ||
12731         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
12732       ;
12733       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12734                             getZExtValue(), SDLoc(CFP), MVT::i64);
12735       return DAG.getStore(Chain, DL, Tmp,
12736                           Ptr, ST->getMemOperand());
12737     }
12738 
12739     if (!ST->isVolatile() &&
12740         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12741       // Many FP stores are not made apparent until after legalize, e.g. for
12742       // argument passing.  Since this is so common, custom legalize the
12743       // 64-bit integer store into two 32-bit stores.
12744       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12745       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12746       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12747       if (DAG.getDataLayout().isBigEndian())
12748         std::swap(Lo, Hi);
12749 
12750       unsigned Alignment = ST->getAlignment();
12751       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12752       AAMDNodes AAInfo = ST->getAAInfo();
12753 
12754       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12755                                  ST->getAlignment(), MMOFlags, AAInfo);
12756       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12757                         DAG.getConstant(4, DL, Ptr.getValueType()));
12758       Alignment = MinAlign(Alignment, 4U);
12759       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12760                                  ST->getPointerInfo().getWithOffset(4),
12761                                  Alignment, MMOFlags, AAInfo);
12762       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12763                          St0, St1);
12764     }
12765 
12766     return SDValue();
12767   }
12768 }
12769 
12770 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12771   StoreSDNode *ST  = cast<StoreSDNode>(N);
12772   SDValue Chain = ST->getChain();
12773   SDValue Value = ST->getValue();
12774   SDValue Ptr   = ST->getBasePtr();
12775 
12776   // If this is a store of a bit convert, store the input value if the
12777   // resultant store does not need a higher alignment than the original.
12778   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12779       ST->isUnindexed()) {
12780     EVT SVT = Value.getOperand(0).getValueType();
12781     if (((!LegalOperations && !ST->isVolatile()) ||
12782          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12783         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12784       unsigned OrigAlign = ST->getAlignment();
12785       bool Fast = false;
12786       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12787                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
12788           Fast) {
12789         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12790                             ST->getPointerInfo(), OrigAlign,
12791                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
12792       }
12793     }
12794   }
12795 
12796   // Turn 'store undef, Ptr' -> nothing.
12797   if (Value.isUndef() && ST->isUnindexed())
12798     return Chain;
12799 
12800   // Try to infer better alignment information than the store already has.
12801   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12802     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12803       if (Align > ST->getAlignment()) {
12804         SDValue NewStore =
12805             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12806                               ST->getMemoryVT(), Align,
12807                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
12808         if (NewStore.getNode() != N)
12809           return CombineTo(ST, NewStore, true);
12810       }
12811     }
12812   }
12813 
12814   // Try transforming a pair floating point load / store ops to integer
12815   // load / store ops.
12816   if (SDValue NewST = TransformFPLoadStorePair(N))
12817     return NewST;
12818 
12819   if (ST->isUnindexed()) {
12820     // Walk up chain skipping non-aliasing memory nodes, on this store and any
12821     // adjacent stores.
12822     if (findBetterNeighborChains(ST)) {
12823       // replaceStoreChain uses CombineTo, which handled all of the worklist
12824       // manipulation. Return the original node to not do anything else.
12825       return SDValue(ST, 0);
12826     }
12827     Chain = ST->getChain();
12828   }
12829 
12830   // Try transforming N to an indexed store.
12831   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12832     return SDValue(N, 0);
12833 
12834   // FIXME: is there such a thing as a truncating indexed store?
12835   if (ST->isTruncatingStore() && ST->isUnindexed() &&
12836       Value.getValueType().isInteger()) {
12837     // See if we can simplify the input to this truncstore with knowledge that
12838     // only the low bits are being used.  For example:
12839     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
12840     SDValue Shorter = GetDemandedBits(
12841         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12842                                     ST->getMemoryVT().getScalarSizeInBits()));
12843     AddToWorklist(Value.getNode());
12844     if (Shorter.getNode())
12845       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12846                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
12847 
12848     // Otherwise, see if we can simplify the operation with
12849     // SimplifyDemandedBits, which only works if the value has a single use.
12850     if (SimplifyDemandedBits(
12851             Value,
12852             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12853                                  ST->getMemoryVT().getScalarSizeInBits()))) {
12854       // Re-visit the store if anything changed and the store hasn't been merged
12855       // with another node (N is deleted) SimplifyDemandedBits will add Value's
12856       // node back to the worklist if necessary, but we also need to re-visit
12857       // the Store node itself.
12858       if (N->getOpcode() != ISD::DELETED_NODE)
12859         AddToWorklist(N);
12860       return SDValue(N, 0);
12861     }
12862   }
12863 
12864   // If this is a load followed by a store to the same location, then the store
12865   // is dead/noop.
12866   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12867     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12868         ST->isUnindexed() && !ST->isVolatile() &&
12869         // There can't be any side effects between the load and store, such as
12870         // a call or store.
12871         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12872       // The store is dead, remove it.
12873       return Chain;
12874     }
12875   }
12876 
12877   // If this is a store followed by a store with the same value to the same
12878   // location, then the store is dead/noop.
12879   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12880     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12881         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12882         ST1->isUnindexed() && !ST1->isVolatile()) {
12883       // The store is dead, remove it.
12884       return Chain;
12885     }
12886   }
12887 
12888   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12889   // truncating store.  We can do this even if this is already a truncstore.
12890   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12891       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12892       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12893                             ST->getMemoryVT())) {
12894     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12895                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12896   }
12897 
12898   // Only perform this optimization before the types are legal, because we
12899   // don't want to perform this optimization on every DAGCombine invocation.
12900   if (!LegalTypes) {
12901     for (;;) {
12902       // There can be multiple store sequences on the same chain.
12903       // Keep trying to merge store sequences until we are unable to do so
12904       // or until we merge the last store on the chain.
12905       bool Changed = MergeConsecutiveStores(ST);
12906       if (!Changed) break;
12907       // Return N as merge only uses CombineTo and no worklist clean
12908       // up is necessary.
12909       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
12910         return SDValue(N, 0);
12911     }
12912   }
12913 
12914   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12915   //
12916   // Make sure to do this only after attempting to merge stores in order to
12917   //  avoid changing the types of some subset of stores due to visit order,
12918   //  preventing their merging.
12919   if (isa<ConstantFPSDNode>(ST->getValue())) {
12920     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12921       return NewSt;
12922   }
12923 
12924   if (SDValue NewSt = splitMergedValStore(ST))
12925     return NewSt;
12926 
12927   return ReduceLoadOpStoreWidth(N);
12928 }
12929 
12930 /// For the instruction sequence of store below, F and I values
12931 /// are bundled together as an i64 value before being stored into memory.
12932 /// Sometimes it is more efficent to generate separate stores for F and I,
12933 /// which can remove the bitwise instructions or sink them to colder places.
12934 ///
12935 ///   (store (or (zext (bitcast F to i32) to i64),
12936 ///              (shl (zext I to i64), 32)), addr)  -->
12937 ///   (store F, addr) and (store I, addr+4)
12938 ///
12939 /// Similarly, splitting for other merged store can also be beneficial, like:
12940 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12941 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12942 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12943 /// For pair of {i16, i8},  i32 store --> two i16 stores.
12944 /// For pair of {i8, i8},   i16 store --> two i8 stores.
12945 ///
12946 /// We allow each target to determine specifically which kind of splitting is
12947 /// supported.
12948 ///
12949 /// The store patterns are commonly seen from the simple code snippet below
12950 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12951 ///   void goo(const std::pair<int, float> &);
12952 ///   hoo() {
12953 ///     ...
12954 ///     goo(std::make_pair(tmp, ftmp));
12955 ///     ...
12956 ///   }
12957 ///
12958 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12959   if (OptLevel == CodeGenOpt::None)
12960     return SDValue();
12961 
12962   SDValue Val = ST->getValue();
12963   SDLoc DL(ST);
12964 
12965   // Match OR operand.
12966   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12967     return SDValue();
12968 
12969   // Match SHL operand and get Lower and Higher parts of Val.
12970   SDValue Op1 = Val.getOperand(0);
12971   SDValue Op2 = Val.getOperand(1);
12972   SDValue Lo, Hi;
12973   if (Op1.getOpcode() != ISD::SHL) {
12974     std::swap(Op1, Op2);
12975     if (Op1.getOpcode() != ISD::SHL)
12976       return SDValue();
12977   }
12978   Lo = Op2;
12979   Hi = Op1.getOperand(0);
12980   if (!Op1.hasOneUse())
12981     return SDValue();
12982 
12983   // Match shift amount to HalfValBitSize.
12984   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
12985   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
12986   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12987     return SDValue();
12988 
12989   // Lo and Hi are zero-extended from int with size less equal than 32
12990   // to i64.
12991   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12992       !Lo.getOperand(0).getValueType().isScalarInteger() ||
12993       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
12994       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12995       !Hi.getOperand(0).getValueType().isScalarInteger() ||
12996       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
12997     return SDValue();
12998 
12999   // Use the EVT of low and high parts before bitcast as the input
13000   // of target query.
13001   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13002                   ? Lo.getOperand(0).getValueType()
13003                   : Lo.getValueType();
13004   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13005                    ? Hi.getOperand(0).getValueType()
13006                    : Hi.getValueType();
13007   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13008     return SDValue();
13009 
13010   // Start to split store.
13011   unsigned Alignment = ST->getAlignment();
13012   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13013   AAMDNodes AAInfo = ST->getAAInfo();
13014 
13015   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13016   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13017   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13018   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13019 
13020   SDValue Chain = ST->getChain();
13021   SDValue Ptr = ST->getBasePtr();
13022   // Lower value store.
13023   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13024                              ST->getAlignment(), MMOFlags, AAInfo);
13025   Ptr =
13026       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13027                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13028   // Higher value store.
13029   SDValue St1 =
13030       DAG.getStore(St0, DL, Hi, Ptr,
13031                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13032                    Alignment / 2, MMOFlags, AAInfo);
13033   return St1;
13034 }
13035 
13036 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13037   SDValue InVec = N->getOperand(0);
13038   SDValue InVal = N->getOperand(1);
13039   SDValue EltNo = N->getOperand(2);
13040   SDLoc DL(N);
13041 
13042   // If the inserted element is an UNDEF, just use the input vector.
13043   if (InVal.isUndef())
13044     return InVec;
13045 
13046   EVT VT = InVec.getValueType();
13047 
13048   // Check that we know which element is being inserted
13049   if (!isa<ConstantSDNode>(EltNo))
13050     return SDValue();
13051   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13052 
13053   // Canonicalize insert_vector_elt dag nodes.
13054   // Example:
13055   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13056   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13057   //
13058   // Do this only if the child insert_vector node has one use; also
13059   // do this only if indices are both constants and Idx1 < Idx0.
13060   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13061       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13062     unsigned OtherElt =
13063       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
13064     if (Elt < OtherElt) {
13065       // Swap nodes.
13066       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13067                                   InVec.getOperand(0), InVal, EltNo);
13068       AddToWorklist(NewOp.getNode());
13069       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13070                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13071     }
13072   }
13073 
13074   // If we can't generate a legal BUILD_VECTOR, exit
13075   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13076     return SDValue();
13077 
13078   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13079   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13080   // vector elements.
13081   SmallVector<SDValue, 8> Ops;
13082   // Do not combine these two vectors if the output vector will not replace
13083   // the input vector.
13084   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13085     Ops.append(InVec.getNode()->op_begin(),
13086                InVec.getNode()->op_end());
13087   } else if (InVec.isUndef()) {
13088     unsigned NElts = VT.getVectorNumElements();
13089     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13090   } else {
13091     return SDValue();
13092   }
13093 
13094   // Insert the element
13095   if (Elt < Ops.size()) {
13096     // All the operands of BUILD_VECTOR must have the same type;
13097     // we enforce that here.
13098     EVT OpVT = Ops[0].getValueType();
13099     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13100   }
13101 
13102   // Return the new vector
13103   return DAG.getBuildVector(VT, DL, Ops);
13104 }
13105 
13106 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13107     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13108   assert(!OriginalLoad->isVolatile());
13109 
13110   EVT ResultVT = EVE->getValueType(0);
13111   EVT VecEltVT = InVecVT.getVectorElementType();
13112   unsigned Align = OriginalLoad->getAlignment();
13113   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13114       VecEltVT.getTypeForEVT(*DAG.getContext()));
13115 
13116   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13117     return SDValue();
13118 
13119   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13120     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13121   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13122     return SDValue();
13123 
13124   Align = NewAlign;
13125 
13126   SDValue NewPtr = OriginalLoad->getBasePtr();
13127   SDValue Offset;
13128   EVT PtrType = NewPtr.getValueType();
13129   MachinePointerInfo MPI;
13130   SDLoc DL(EVE);
13131   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13132     int Elt = ConstEltNo->getZExtValue();
13133     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13134     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13135     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13136   } else {
13137     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13138     Offset = DAG.getNode(
13139         ISD::MUL, DL, PtrType, Offset,
13140         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13141     MPI = OriginalLoad->getPointerInfo();
13142   }
13143   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13144 
13145   // The replacement we need to do here is a little tricky: we need to
13146   // replace an extractelement of a load with a load.
13147   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13148   // Note that this replacement assumes that the extractvalue is the only
13149   // use of the load; that's okay because we don't want to perform this
13150   // transformation in other cases anyway.
13151   SDValue Load;
13152   SDValue Chain;
13153   if (ResultVT.bitsGT(VecEltVT)) {
13154     // If the result type of vextract is wider than the load, then issue an
13155     // extending load instead.
13156     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13157                                                   VecEltVT)
13158                                    ? ISD::ZEXTLOAD
13159                                    : ISD::EXTLOAD;
13160     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13161                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13162                           Align, OriginalLoad->getMemOperand()->getFlags(),
13163                           OriginalLoad->getAAInfo());
13164     Chain = Load.getValue(1);
13165   } else {
13166     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13167                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13168                        OriginalLoad->getAAInfo());
13169     Chain = Load.getValue(1);
13170     if (ResultVT.bitsLT(VecEltVT))
13171       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13172     else
13173       Load = DAG.getBitcast(ResultVT, Load);
13174   }
13175   WorklistRemover DeadNodes(*this);
13176   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13177   SDValue To[] = { Load, Chain };
13178   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13179   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13180   // worklist explicitly as well.
13181   AddToWorklist(Load.getNode());
13182   AddUsersToWorklist(Load.getNode()); // Add users too
13183   // Make sure to revisit this node to clean it up; it will usually be dead.
13184   AddToWorklist(EVE);
13185   ++OpsNarrowed;
13186   return SDValue(EVE, 0);
13187 }
13188 
13189 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13190   // (vextract (scalar_to_vector val, 0) -> val
13191   SDValue InVec = N->getOperand(0);
13192   EVT VT = InVec.getValueType();
13193   EVT NVT = N->getValueType(0);
13194 
13195   if (InVec.isUndef())
13196     return DAG.getUNDEF(NVT);
13197 
13198   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13199     // Check if the result type doesn't match the inserted element type. A
13200     // SCALAR_TO_VECTOR may truncate the inserted element and the
13201     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13202     SDValue InOp = InVec.getOperand(0);
13203     if (InOp.getValueType() != NVT) {
13204       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13205       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13206     }
13207     return InOp;
13208   }
13209 
13210   SDValue EltNo = N->getOperand(1);
13211   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13212 
13213   // extract_vector_elt (build_vector x, y), 1 -> y
13214   if (ConstEltNo &&
13215       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13216       TLI.isTypeLegal(VT) &&
13217       (InVec.hasOneUse() ||
13218        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13219     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13220     EVT InEltVT = Elt.getValueType();
13221 
13222     // Sometimes build_vector's scalar input types do not match result type.
13223     if (NVT == InEltVT)
13224       return Elt;
13225 
13226     // TODO: It may be useful to truncate if free if the build_vector implicitly
13227     // converts.
13228   }
13229 
13230   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
13231   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13232       ConstEltNo->isNullValue() && VT.isInteger()) {
13233     SDValue BCSrc = InVec.getOperand(0);
13234     if (BCSrc.getValueType().isScalarInteger())
13235       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13236   }
13237 
13238   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13239   //
13240   // This only really matters if the index is non-constant since other combines
13241   // on the constant elements already work.
13242   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13243       EltNo == InVec.getOperand(2)) {
13244     SDValue Elt = InVec.getOperand(1);
13245     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13246   }
13247 
13248   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13249   // We only perform this optimization before the op legalization phase because
13250   // we may introduce new vector instructions which are not backed by TD
13251   // patterns. For example on AVX, extracting elements from a wide vector
13252   // without using extract_subvector. However, if we can find an underlying
13253   // scalar value, then we can always use that.
13254   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13255     int NumElem = VT.getVectorNumElements();
13256     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13257     // Find the new index to extract from.
13258     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13259 
13260     // Extracting an undef index is undef.
13261     if (OrigElt == -1)
13262       return DAG.getUNDEF(NVT);
13263 
13264     // Select the right vector half to extract from.
13265     SDValue SVInVec;
13266     if (OrigElt < NumElem) {
13267       SVInVec = InVec->getOperand(0);
13268     } else {
13269       SVInVec = InVec->getOperand(1);
13270       OrigElt -= NumElem;
13271     }
13272 
13273     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13274       SDValue InOp = SVInVec.getOperand(OrigElt);
13275       if (InOp.getValueType() != NVT) {
13276         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13277         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13278       }
13279 
13280       return InOp;
13281     }
13282 
13283     // FIXME: We should handle recursing on other vector shuffles and
13284     // scalar_to_vector here as well.
13285 
13286     if (!LegalOperations) {
13287       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13288       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13289                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13290     }
13291   }
13292 
13293   bool BCNumEltsChanged = false;
13294   EVT ExtVT = VT.getVectorElementType();
13295   EVT LVT = ExtVT;
13296 
13297   // If the result of load has to be truncated, then it's not necessarily
13298   // profitable.
13299   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13300     return SDValue();
13301 
13302   if (InVec.getOpcode() == ISD::BITCAST) {
13303     // Don't duplicate a load with other uses.
13304     if (!InVec.hasOneUse())
13305       return SDValue();
13306 
13307     EVT BCVT = InVec.getOperand(0).getValueType();
13308     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13309       return SDValue();
13310     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13311       BCNumEltsChanged = true;
13312     InVec = InVec.getOperand(0);
13313     ExtVT = BCVT.getVectorElementType();
13314   }
13315 
13316   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13317   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13318       ISD::isNormalLoad(InVec.getNode()) &&
13319       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13320     SDValue Index = N->getOperand(1);
13321     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13322       if (!OrigLoad->isVolatile()) {
13323         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13324                                                              OrigLoad);
13325       }
13326     }
13327   }
13328 
13329   // Perform only after legalization to ensure build_vector / vector_shuffle
13330   // optimizations have already been done.
13331   if (!LegalOperations) return SDValue();
13332 
13333   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13334   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13335   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13336 
13337   if (ConstEltNo) {
13338     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13339 
13340     LoadSDNode *LN0 = nullptr;
13341     const ShuffleVectorSDNode *SVN = nullptr;
13342     if (ISD::isNormalLoad(InVec.getNode())) {
13343       LN0 = cast<LoadSDNode>(InVec);
13344     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13345                InVec.getOperand(0).getValueType() == ExtVT &&
13346                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13347       // Don't duplicate a load with other uses.
13348       if (!InVec.hasOneUse())
13349         return SDValue();
13350 
13351       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13352     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13353       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13354       // =>
13355       // (load $addr+1*size)
13356 
13357       // Don't duplicate a load with other uses.
13358       if (!InVec.hasOneUse())
13359         return SDValue();
13360 
13361       // If the bit convert changed the number of elements, it is unsafe
13362       // to examine the mask.
13363       if (BCNumEltsChanged)
13364         return SDValue();
13365 
13366       // Select the input vector, guarding against out of range extract vector.
13367       unsigned NumElems = VT.getVectorNumElements();
13368       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13369       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13370 
13371       if (InVec.getOpcode() == ISD::BITCAST) {
13372         // Don't duplicate a load with other uses.
13373         if (!InVec.hasOneUse())
13374           return SDValue();
13375 
13376         InVec = InVec.getOperand(0);
13377       }
13378       if (ISD::isNormalLoad(InVec.getNode())) {
13379         LN0 = cast<LoadSDNode>(InVec);
13380         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13381         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13382       }
13383     }
13384 
13385     // Make sure we found a non-volatile load and the extractelement is
13386     // the only use.
13387     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13388       return SDValue();
13389 
13390     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13391     if (Elt == -1)
13392       return DAG.getUNDEF(LVT);
13393 
13394     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13395   }
13396 
13397   return SDValue();
13398 }
13399 
13400 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13401 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13402   // We perform this optimization post type-legalization because
13403   // the type-legalizer often scalarizes integer-promoted vectors.
13404   // Performing this optimization before may create bit-casts which
13405   // will be type-legalized to complex code sequences.
13406   // We perform this optimization only before the operation legalizer because we
13407   // may introduce illegal operations.
13408   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13409     return SDValue();
13410 
13411   unsigned NumInScalars = N->getNumOperands();
13412   SDLoc DL(N);
13413   EVT VT = N->getValueType(0);
13414 
13415   // Check to see if this is a BUILD_VECTOR of a bunch of values
13416   // which come from any_extend or zero_extend nodes. If so, we can create
13417   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13418   // optimizations. We do not handle sign-extend because we can't fill the sign
13419   // using shuffles.
13420   EVT SourceType = MVT::Other;
13421   bool AllAnyExt = true;
13422 
13423   for (unsigned i = 0; i != NumInScalars; ++i) {
13424     SDValue In = N->getOperand(i);
13425     // Ignore undef inputs.
13426     if (In.isUndef()) continue;
13427 
13428     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13429     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13430 
13431     // Abort if the element is not an extension.
13432     if (!ZeroExt && !AnyExt) {
13433       SourceType = MVT::Other;
13434       break;
13435     }
13436 
13437     // The input is a ZeroExt or AnyExt. Check the original type.
13438     EVT InTy = In.getOperand(0).getValueType();
13439 
13440     // Check that all of the widened source types are the same.
13441     if (SourceType == MVT::Other)
13442       // First time.
13443       SourceType = InTy;
13444     else if (InTy != SourceType) {
13445       // Multiple income types. Abort.
13446       SourceType = MVT::Other;
13447       break;
13448     }
13449 
13450     // Check if all of the extends are ANY_EXTENDs.
13451     AllAnyExt &= AnyExt;
13452   }
13453 
13454   // In order to have valid types, all of the inputs must be extended from the
13455   // same source type and all of the inputs must be any or zero extend.
13456   // Scalar sizes must be a power of two.
13457   EVT OutScalarTy = VT.getScalarType();
13458   bool ValidTypes = SourceType != MVT::Other &&
13459                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13460                  isPowerOf2_32(SourceType.getSizeInBits());
13461 
13462   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13463   // turn into a single shuffle instruction.
13464   if (!ValidTypes)
13465     return SDValue();
13466 
13467   bool isLE = DAG.getDataLayout().isLittleEndian();
13468   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13469   assert(ElemRatio > 1 && "Invalid element size ratio");
13470   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13471                                DAG.getConstant(0, DL, SourceType);
13472 
13473   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13474   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13475 
13476   // Populate the new build_vector
13477   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13478     SDValue Cast = N->getOperand(i);
13479     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13480             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13481             Cast.isUndef()) && "Invalid cast opcode");
13482     SDValue In;
13483     if (Cast.isUndef())
13484       In = DAG.getUNDEF(SourceType);
13485     else
13486       In = Cast->getOperand(0);
13487     unsigned Index = isLE ? (i * ElemRatio) :
13488                             (i * ElemRatio + (ElemRatio - 1));
13489 
13490     assert(Index < Ops.size() && "Invalid index");
13491     Ops[Index] = In;
13492   }
13493 
13494   // The type of the new BUILD_VECTOR node.
13495   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13496   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13497          "Invalid vector size");
13498   // Check if the new vector type is legal.
13499   if (!isTypeLegal(VecVT)) return SDValue();
13500 
13501   // Make the new BUILD_VECTOR.
13502   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13503 
13504   // The new BUILD_VECTOR node has the potential to be further optimized.
13505   AddToWorklist(BV.getNode());
13506   // Bitcast to the desired type.
13507   return DAG.getBitcast(VT, BV);
13508 }
13509 
13510 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
13511   EVT VT = N->getValueType(0);
13512 
13513   unsigned NumInScalars = N->getNumOperands();
13514   SDLoc DL(N);
13515 
13516   EVT SrcVT = MVT::Other;
13517   unsigned Opcode = ISD::DELETED_NODE;
13518   unsigned NumDefs = 0;
13519 
13520   for (unsigned i = 0; i != NumInScalars; ++i) {
13521     SDValue In = N->getOperand(i);
13522     unsigned Opc = In.getOpcode();
13523 
13524     if (Opc == ISD::UNDEF)
13525       continue;
13526 
13527     // If all scalar values are floats and converted from integers.
13528     if (Opcode == ISD::DELETED_NODE &&
13529         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
13530       Opcode = Opc;
13531     }
13532 
13533     if (Opc != Opcode)
13534       return SDValue();
13535 
13536     EVT InVT = In.getOperand(0).getValueType();
13537 
13538     // If all scalar values are typed differently, bail out. It's chosen to
13539     // simplify BUILD_VECTOR of integer types.
13540     if (SrcVT == MVT::Other)
13541       SrcVT = InVT;
13542     if (SrcVT != InVT)
13543       return SDValue();
13544     NumDefs++;
13545   }
13546 
13547   // If the vector has just one element defined, it's not worth to fold it into
13548   // a vectorized one.
13549   if (NumDefs < 2)
13550     return SDValue();
13551 
13552   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
13553          && "Should only handle conversion from integer to float.");
13554   assert(SrcVT != MVT::Other && "Cannot determine source type!");
13555 
13556   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13557 
13558   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13559     return SDValue();
13560 
13561   // Just because the floating-point vector type is legal does not necessarily
13562   // mean that the corresponding integer vector type is.
13563   if (!isTypeLegal(NVT))
13564     return SDValue();
13565 
13566   SmallVector<SDValue, 8> Opnds;
13567   for (unsigned i = 0; i != NumInScalars; ++i) {
13568     SDValue In = N->getOperand(i);
13569 
13570     if (In.isUndef())
13571       Opnds.push_back(DAG.getUNDEF(SrcVT));
13572     else
13573       Opnds.push_back(In.getOperand(0));
13574   }
13575   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13576   AddToWorklist(BV.getNode());
13577 
13578   return DAG.getNode(Opcode, DL, VT, BV);
13579 }
13580 
13581 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
13582                                            ArrayRef<int> VectorMask,
13583                                            SDValue VecIn1, SDValue VecIn2,
13584                                            unsigned LeftIdx) {
13585   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13586   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13587 
13588   EVT VT = N->getValueType(0);
13589   EVT InVT1 = VecIn1.getValueType();
13590   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13591 
13592   unsigned Vec2Offset = InVT1.getVectorNumElements();
13593   unsigned NumElems = VT.getVectorNumElements();
13594   unsigned ShuffleNumElems = NumElems;
13595 
13596   // We can't generate a shuffle node with mismatched input and output types.
13597   // Try to make the types match the type of the output.
13598   if (InVT1 != VT || InVT2 != VT) {
13599     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13600       // If the output vector length is a multiple of both input lengths,
13601       // we can concatenate them and pad the rest with undefs.
13602       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
13603       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
13604       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
13605       ConcatOps[0] = VecIn1;
13606       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
13607       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13608       VecIn2 = SDValue();
13609     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
13610       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
13611         return SDValue();
13612 
13613       if (!VecIn2.getNode()) {
13614         // If we only have one input vector, and it's twice the size of the
13615         // output, split it in two.
13616         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
13617                              DAG.getConstant(NumElems, DL, IdxTy));
13618         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
13619         // Since we now have shorter input vectors, adjust the offset of the
13620         // second vector's start.
13621         Vec2Offset = NumElems;
13622       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
13623         // VecIn1 is wider than the output, and we have another, possibly
13624         // smaller input. Pad the smaller input with undefs, shuffle at the
13625         // input vector width, and extract the output.
13626         // The shuffle type is different than VT, so check legality again.
13627         if (LegalOperations &&
13628             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
13629           return SDValue();
13630 
13631         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
13632         // lower it back into a BUILD_VECTOR. So if the inserted type is
13633         // illegal, don't even try.
13634         if (InVT1 != InVT2) {
13635           if (!TLI.isTypeLegal(InVT2))
13636             return SDValue();
13637           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
13638                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
13639         }
13640         ShuffleNumElems = NumElems * 2;
13641       } else {
13642         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
13643         // than VecIn1. We can't handle this for now - this case will disappear
13644         // when we start sorting the vectors by type.
13645         return SDValue();
13646       }
13647     } else {
13648       // TODO: Support cases where the length mismatch isn't exactly by a
13649       // factor of 2.
13650       // TODO: Move this check upwards, so that if we have bad type
13651       // mismatches, we don't create any DAG nodes.
13652       return SDValue();
13653     }
13654   }
13655 
13656   // Initialize mask to undef.
13657   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13658 
13659   // Only need to run up to the number of elements actually used, not the
13660   // total number of elements in the shuffle - if we are shuffling a wider
13661   // vector, the high lanes should be set to undef.
13662   for (unsigned i = 0; i != NumElems; ++i) {
13663     if (VectorMask[i] <= 0)
13664       continue;
13665 
13666     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
13667     if (VectorMask[i] == (int)LeftIdx) {
13668       Mask[i] = ExtIndex;
13669     } else if (VectorMask[i] == (int)LeftIdx + 1) {
13670       Mask[i] = Vec2Offset + ExtIndex;
13671     }
13672   }
13673 
13674   // The type the input vectors may have changed above.
13675   InVT1 = VecIn1.getValueType();
13676 
13677   // If we already have a VecIn2, it should have the same type as VecIn1.
13678   // If we don't, get an undef/zero vector of the appropriate type.
13679   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
13680   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
13681 
13682   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
13683   if (ShuffleNumElems > NumElems)
13684     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
13685 
13686   return Shuffle;
13687 }
13688 
13689 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
13690 // operations. If the types of the vectors we're extracting from allow it,
13691 // turn this into a vector_shuffle node.
13692 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
13693   SDLoc DL(N);
13694   EVT VT = N->getValueType(0);
13695 
13696   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
13697   if (!isTypeLegal(VT))
13698     return SDValue();
13699 
13700   // May only combine to shuffle after legalize if shuffle is legal.
13701   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
13702     return SDValue();
13703 
13704   bool UsesZeroVector = false;
13705   unsigned NumElems = N->getNumOperands();
13706 
13707   // Record, for each element of the newly built vector, which input vector
13708   // that element comes from. -1 stands for undef, 0 for the zero vector,
13709   // and positive values for the input vectors.
13710   // VectorMask maps each element to its vector number, and VecIn maps vector
13711   // numbers to their initial SDValues.
13712 
13713   SmallVector<int, 8> VectorMask(NumElems, -1);
13714   SmallVector<SDValue, 8> VecIn;
13715   VecIn.push_back(SDValue());
13716 
13717   for (unsigned i = 0; i != NumElems; ++i) {
13718     SDValue Op = N->getOperand(i);
13719 
13720     if (Op.isUndef())
13721       continue;
13722 
13723     // See if we can use a blend with a zero vector.
13724     // TODO: Should we generalize this to a blend with an arbitrary constant
13725     // vector?
13726     if (isNullConstant(Op) || isNullFPConstant(Op)) {
13727       UsesZeroVector = true;
13728       VectorMask[i] = 0;
13729       continue;
13730     }
13731 
13732     // Not an undef or zero. If the input is something other than an
13733     // EXTRACT_VECTOR_ELT with a constant index, bail out.
13734     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13735         !isa<ConstantSDNode>(Op.getOperand(1)))
13736       return SDValue();
13737 
13738     SDValue ExtractedFromVec = Op.getOperand(0);
13739 
13740     // All inputs must have the same element type as the output.
13741     if (VT.getVectorElementType() !=
13742         ExtractedFromVec.getValueType().getVectorElementType())
13743       return SDValue();
13744 
13745     // Have we seen this input vector before?
13746     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
13747     // a map back from SDValues to numbers isn't worth it.
13748     unsigned Idx = std::distance(
13749         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
13750     if (Idx == VecIn.size())
13751       VecIn.push_back(ExtractedFromVec);
13752 
13753     VectorMask[i] = Idx;
13754   }
13755 
13756   // If we didn't find at least one input vector, bail out.
13757   if (VecIn.size() < 2)
13758     return SDValue();
13759 
13760   // TODO: We want to sort the vectors by descending length, so that adjacent
13761   // pairs have similar length, and the longer vector is always first in the
13762   // pair.
13763 
13764   // TODO: Should this fire if some of the input vectors has illegal type (like
13765   // it does now), or should we let legalization run its course first?
13766 
13767   // Shuffle phase:
13768   // Take pairs of vectors, and shuffle them so that the result has elements
13769   // from these vectors in the correct places.
13770   // For example, given:
13771   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
13772   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
13773   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
13774   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
13775   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
13776   // We will generate:
13777   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
13778   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
13779   SmallVector<SDValue, 4> Shuffles;
13780   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
13781     unsigned LeftIdx = 2 * In + 1;
13782     SDValue VecLeft = VecIn[LeftIdx];
13783     SDValue VecRight =
13784         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
13785 
13786     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
13787                                                 VecRight, LeftIdx))
13788       Shuffles.push_back(Shuffle);
13789     else
13790       return SDValue();
13791   }
13792 
13793   // If we need the zero vector as an "ingredient" in the blend tree, add it
13794   // to the list of shuffles.
13795   if (UsesZeroVector)
13796     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
13797                                       : DAG.getConstantFP(0.0, DL, VT));
13798 
13799   // If we only have one shuffle, we're done.
13800   if (Shuffles.size() == 1)
13801     return Shuffles[0];
13802 
13803   // Update the vector mask to point to the post-shuffle vectors.
13804   for (int &Vec : VectorMask)
13805     if (Vec == 0)
13806       Vec = Shuffles.size() - 1;
13807     else
13808       Vec = (Vec - 1) / 2;
13809 
13810   // More than one shuffle. Generate a binary tree of blends, e.g. if from
13811   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
13812   // generate:
13813   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
13814   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
13815   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
13816   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
13817   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
13818   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
13819   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
13820 
13821   // Make sure the initial size of the shuffle list is even.
13822   if (Shuffles.size() % 2)
13823     Shuffles.push_back(DAG.getUNDEF(VT));
13824 
13825   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
13826     if (CurSize % 2) {
13827       Shuffles[CurSize] = DAG.getUNDEF(VT);
13828       CurSize++;
13829     }
13830     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
13831       int Left = 2 * In;
13832       int Right = 2 * In + 1;
13833       SmallVector<int, 8> Mask(NumElems, -1);
13834       for (unsigned i = 0; i != NumElems; ++i) {
13835         if (VectorMask[i] == Left) {
13836           Mask[i] = i;
13837           VectorMask[i] = In;
13838         } else if (VectorMask[i] == Right) {
13839           Mask[i] = i + NumElems;
13840           VectorMask[i] = In;
13841         }
13842       }
13843 
13844       Shuffles[In] =
13845           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
13846     }
13847   }
13848 
13849   return Shuffles[0];
13850 }
13851 
13852 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13853   EVT VT = N->getValueType(0);
13854 
13855   // A vector built entirely of undefs is undef.
13856   if (ISD::allOperandsUndef(N))
13857     return DAG.getUNDEF(VT);
13858 
13859   // Check if we can express BUILD VECTOR via subvector extract.
13860   if (!LegalTypes && (N->getNumOperands() > 1)) {
13861     SDValue Op0 = N->getOperand(0);
13862     auto checkElem = [&](SDValue Op) -> uint64_t {
13863       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
13864           (Op0.getOperand(0) == Op.getOperand(0)))
13865         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
13866           return CNode->getZExtValue();
13867       return -1;
13868     };
13869 
13870     int Offset = checkElem(Op0);
13871     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
13872       if (Offset + i != checkElem(N->getOperand(i))) {
13873         Offset = -1;
13874         break;
13875       }
13876     }
13877 
13878     if ((Offset == 0) &&
13879         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
13880       return Op0.getOperand(0);
13881     if ((Offset != -1) &&
13882         ((Offset % N->getValueType(0).getVectorNumElements()) ==
13883          0)) // IDX must be multiple of output size.
13884       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
13885                          Op0.getOperand(0), Op0.getOperand(1));
13886   }
13887 
13888   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13889     return V;
13890 
13891   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13892     return V;
13893 
13894   if (SDValue V = reduceBuildVecToShuffle(N))
13895     return V;
13896 
13897   return SDValue();
13898 }
13899 
13900 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
13901   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13902   EVT OpVT = N->getOperand(0).getValueType();
13903 
13904   // If the operands are legal vectors, leave them alone.
13905   if (TLI.isTypeLegal(OpVT))
13906     return SDValue();
13907 
13908   SDLoc DL(N);
13909   EVT VT = N->getValueType(0);
13910   SmallVector<SDValue, 8> Ops;
13911 
13912   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13913   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13914 
13915   // Keep track of what we encounter.
13916   bool AnyInteger = false;
13917   bool AnyFP = false;
13918   for (const SDValue &Op : N->ops()) {
13919     if (ISD::BITCAST == Op.getOpcode() &&
13920         !Op.getOperand(0).getValueType().isVector())
13921       Ops.push_back(Op.getOperand(0));
13922     else if (ISD::UNDEF == Op.getOpcode())
13923       Ops.push_back(ScalarUndef);
13924     else
13925       return SDValue();
13926 
13927     // Note whether we encounter an integer or floating point scalar.
13928     // If it's neither, bail out, it could be something weird like x86mmx.
13929     EVT LastOpVT = Ops.back().getValueType();
13930     if (LastOpVT.isFloatingPoint())
13931       AnyFP = true;
13932     else if (LastOpVT.isInteger())
13933       AnyInteger = true;
13934     else
13935       return SDValue();
13936   }
13937 
13938   // If any of the operands is a floating point scalar bitcast to a vector,
13939   // use floating point types throughout, and bitcast everything.
13940   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13941   if (AnyFP) {
13942     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13943     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13944     if (AnyInteger) {
13945       for (SDValue &Op : Ops) {
13946         if (Op.getValueType() == SVT)
13947           continue;
13948         if (Op.isUndef())
13949           Op = ScalarUndef;
13950         else
13951           Op = DAG.getBitcast(SVT, Op);
13952       }
13953     }
13954   }
13955 
13956   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13957                                VT.getSizeInBits() / SVT.getSizeInBits());
13958   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13959 }
13960 
13961 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13962 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13963 // most two distinct vectors the same size as the result, attempt to turn this
13964 // into a legal shuffle.
13965 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
13966   EVT VT = N->getValueType(0);
13967   EVT OpVT = N->getOperand(0).getValueType();
13968   int NumElts = VT.getVectorNumElements();
13969   int NumOpElts = OpVT.getVectorNumElements();
13970 
13971   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13972   SmallVector<int, 8> Mask;
13973 
13974   for (SDValue Op : N->ops()) {
13975     // Peek through any bitcast.
13976     while (Op.getOpcode() == ISD::BITCAST)
13977       Op = Op.getOperand(0);
13978 
13979     // UNDEF nodes convert to UNDEF shuffle mask values.
13980     if (Op.isUndef()) {
13981       Mask.append((unsigned)NumOpElts, -1);
13982       continue;
13983     }
13984 
13985     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13986       return SDValue();
13987 
13988     // What vector are we extracting the subvector from and at what index?
13989     SDValue ExtVec = Op.getOperand(0);
13990 
13991     // We want the EVT of the original extraction to correctly scale the
13992     // extraction index.
13993     EVT ExtVT = ExtVec.getValueType();
13994 
13995     // Peek through any bitcast.
13996     while (ExtVec.getOpcode() == ISD::BITCAST)
13997       ExtVec = ExtVec.getOperand(0);
13998 
13999     // UNDEF nodes convert to UNDEF shuffle mask values.
14000     if (ExtVec.isUndef()) {
14001       Mask.append((unsigned)NumOpElts, -1);
14002       continue;
14003     }
14004 
14005     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14006       return SDValue();
14007     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
14008 
14009     // Ensure that we are extracting a subvector from a vector the same
14010     // size as the result.
14011     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14012       return SDValue();
14013 
14014     // Scale the subvector index to account for any bitcast.
14015     int NumExtElts = ExtVT.getVectorNumElements();
14016     if (0 == (NumExtElts % NumElts))
14017       ExtIdx /= (NumExtElts / NumElts);
14018     else if (0 == (NumElts % NumExtElts))
14019       ExtIdx *= (NumElts / NumExtElts);
14020     else
14021       return SDValue();
14022 
14023     // At most we can reference 2 inputs in the final shuffle.
14024     if (SV0.isUndef() || SV0 == ExtVec) {
14025       SV0 = ExtVec;
14026       for (int i = 0; i != NumOpElts; ++i)
14027         Mask.push_back(i + ExtIdx);
14028     } else if (SV1.isUndef() || SV1 == ExtVec) {
14029       SV1 = ExtVec;
14030       for (int i = 0; i != NumOpElts; ++i)
14031         Mask.push_back(i + ExtIdx + NumElts);
14032     } else {
14033       return SDValue();
14034     }
14035   }
14036 
14037   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14038     return SDValue();
14039 
14040   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14041                               DAG.getBitcast(VT, SV1), Mask);
14042 }
14043 
14044 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14045   // If we only have one input vector, we don't need to do any concatenation.
14046   if (N->getNumOperands() == 1)
14047     return N->getOperand(0);
14048 
14049   // Check if all of the operands are undefs.
14050   EVT VT = N->getValueType(0);
14051   if (ISD::allOperandsUndef(N))
14052     return DAG.getUNDEF(VT);
14053 
14054   // Optimize concat_vectors where all but the first of the vectors are undef.
14055   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14056         return Op.isUndef();
14057       })) {
14058     SDValue In = N->getOperand(0);
14059     assert(In.getValueType().isVector() && "Must concat vectors");
14060 
14061     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14062     if (In->getOpcode() == ISD::BITCAST &&
14063         !In->getOperand(0)->getValueType(0).isVector()) {
14064       SDValue Scalar = In->getOperand(0);
14065 
14066       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14067       // look through the trunc so we can still do the transform:
14068       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14069       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14070           !TLI.isTypeLegal(Scalar.getValueType()) &&
14071           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14072         Scalar = Scalar->getOperand(0);
14073 
14074       EVT SclTy = Scalar->getValueType(0);
14075 
14076       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14077         return SDValue();
14078 
14079       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14080       if (VNTNumElms < 2)
14081         return SDValue();
14082 
14083       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14084       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14085         return SDValue();
14086 
14087       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14088       return DAG.getBitcast(VT, Res);
14089     }
14090   }
14091 
14092   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14093   // We have already tested above for an UNDEF only concatenation.
14094   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14095   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14096   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14097     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14098   };
14099   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14100     SmallVector<SDValue, 8> Opnds;
14101     EVT SVT = VT.getScalarType();
14102 
14103     EVT MinVT = SVT;
14104     if (!SVT.isFloatingPoint()) {
14105       // If BUILD_VECTOR are from built from integer, they may have different
14106       // operand types. Get the smallest type and truncate all operands to it.
14107       bool FoundMinVT = false;
14108       for (const SDValue &Op : N->ops())
14109         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14110           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14111           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14112           FoundMinVT = true;
14113         }
14114       assert(FoundMinVT && "Concat vector type mismatch");
14115     }
14116 
14117     for (const SDValue &Op : N->ops()) {
14118       EVT OpVT = Op.getValueType();
14119       unsigned NumElts = OpVT.getVectorNumElements();
14120 
14121       if (ISD::UNDEF == Op.getOpcode())
14122         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14123 
14124       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14125         if (SVT.isFloatingPoint()) {
14126           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14127           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14128         } else {
14129           for (unsigned i = 0; i != NumElts; ++i)
14130             Opnds.push_back(
14131                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14132         }
14133       }
14134     }
14135 
14136     assert(VT.getVectorNumElements() == Opnds.size() &&
14137            "Concat vector type mismatch");
14138     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14139   }
14140 
14141   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14142   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14143     return V;
14144 
14145   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14146   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14147     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14148       return V;
14149 
14150   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14151   // nodes often generate nop CONCAT_VECTOR nodes.
14152   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14153   // place the incoming vectors at the exact same location.
14154   SDValue SingleSource = SDValue();
14155   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14156 
14157   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14158     SDValue Op = N->getOperand(i);
14159 
14160     if (Op.isUndef())
14161       continue;
14162 
14163     // Check if this is the identity extract:
14164     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14165       return SDValue();
14166 
14167     // Find the single incoming vector for the extract_subvector.
14168     if (SingleSource.getNode()) {
14169       if (Op.getOperand(0) != SingleSource)
14170         return SDValue();
14171     } else {
14172       SingleSource = Op.getOperand(0);
14173 
14174       // Check the source type is the same as the type of the result.
14175       // If not, this concat may extend the vector, so we can not
14176       // optimize it away.
14177       if (SingleSource.getValueType() != N->getValueType(0))
14178         return SDValue();
14179     }
14180 
14181     unsigned IdentityIndex = i * PartNumElem;
14182     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14183     // The extract index must be constant.
14184     if (!CS)
14185       return SDValue();
14186 
14187     // Check that we are reading from the identity index.
14188     if (CS->getZExtValue() != IdentityIndex)
14189       return SDValue();
14190   }
14191 
14192   if (SingleSource.getNode())
14193     return SingleSource;
14194 
14195   return SDValue();
14196 }
14197 
14198 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14199   EVT NVT = N->getValueType(0);
14200   SDValue V = N->getOperand(0);
14201 
14202   // Extract from UNDEF is UNDEF.
14203   if (V.isUndef())
14204     return DAG.getUNDEF(NVT);
14205 
14206   // Combine:
14207   //    (extract_subvec (concat V1, V2, ...), i)
14208   // Into:
14209   //    Vi if possible
14210   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14211   // type.
14212   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14213       isa<ConstantSDNode>(N->getOperand(1)) &&
14214       V->getOperand(0).getValueType() == NVT) {
14215     unsigned Idx = N->getConstantOperandVal(1);
14216     unsigned NumElems = NVT.getVectorNumElements();
14217     assert((Idx % NumElems) == 0 &&
14218            "IDX in concat is not a multiple of the result vector length.");
14219     return V->getOperand(Idx / NumElems);
14220   }
14221 
14222   // Skip bitcasting
14223   if (V->getOpcode() == ISD::BITCAST)
14224     V = V.getOperand(0);
14225 
14226   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14227     // Handle only simple case where vector being inserted and vector
14228     // being extracted are of same size.
14229     EVT SmallVT = V->getOperand(1).getValueType();
14230     if (!NVT.bitsEq(SmallVT))
14231       return SDValue();
14232 
14233     // Only handle cases where both indexes are constants.
14234     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14235     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14236 
14237     if (InsIdx && ExtIdx) {
14238       // Combine:
14239       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14240       // Into:
14241       //    indices are equal or bit offsets are equal => V1
14242       //    otherwise => (extract_subvec V1, ExtIdx)
14243       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14244           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14245         return DAG.getBitcast(NVT, V->getOperand(1));
14246       return DAG.getNode(
14247           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14248           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14249           N->getOperand(1));
14250     }
14251   }
14252 
14253   return SDValue();
14254 }
14255 
14256 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14257                                                  SDValue V, SelectionDAG &DAG) {
14258   SDLoc DL(V);
14259   EVT VT = V.getValueType();
14260 
14261   switch (V.getOpcode()) {
14262   default:
14263     return V;
14264 
14265   case ISD::CONCAT_VECTORS: {
14266     EVT OpVT = V->getOperand(0).getValueType();
14267     int OpSize = OpVT.getVectorNumElements();
14268     SmallBitVector OpUsedElements(OpSize, false);
14269     bool FoundSimplification = false;
14270     SmallVector<SDValue, 4> NewOps;
14271     NewOps.reserve(V->getNumOperands());
14272     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14273       SDValue Op = V->getOperand(i);
14274       bool OpUsed = false;
14275       for (int j = 0; j < OpSize; ++j)
14276         if (UsedElements[i * OpSize + j]) {
14277           OpUsedElements[j] = true;
14278           OpUsed = true;
14279         }
14280       NewOps.push_back(
14281           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
14282                  : DAG.getUNDEF(OpVT));
14283       FoundSimplification |= Op == NewOps.back();
14284       OpUsedElements.reset();
14285     }
14286     if (FoundSimplification)
14287       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
14288     return V;
14289   }
14290 
14291   case ISD::INSERT_SUBVECTOR: {
14292     SDValue BaseV = V->getOperand(0);
14293     SDValue SubV = V->getOperand(1);
14294     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
14295     if (!IdxN)
14296       return V;
14297 
14298     int SubSize = SubV.getValueType().getVectorNumElements();
14299     int Idx = IdxN->getZExtValue();
14300     bool SubVectorUsed = false;
14301     SmallBitVector SubUsedElements(SubSize, false);
14302     for (int i = 0; i < SubSize; ++i)
14303       if (UsedElements[i + Idx]) {
14304         SubVectorUsed = true;
14305         SubUsedElements[i] = true;
14306         UsedElements[i + Idx] = false;
14307       }
14308 
14309     // Now recurse on both the base and sub vectors.
14310     SDValue SimplifiedSubV =
14311         SubVectorUsed
14312             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
14313             : DAG.getUNDEF(SubV.getValueType());
14314     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
14315     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
14316       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
14317                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
14318     return V;
14319   }
14320   }
14321 }
14322 
14323 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
14324                                        SDValue N1, SelectionDAG &DAG) {
14325   EVT VT = SVN->getValueType(0);
14326   int NumElts = VT.getVectorNumElements();
14327   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
14328   for (int M : SVN->getMask())
14329     if (M >= 0 && M < NumElts)
14330       N0UsedElements[M] = true;
14331     else if (M >= NumElts)
14332       N1UsedElements[M - NumElts] = true;
14333 
14334   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
14335   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
14336   if (S0 == N0 && S1 == N1)
14337     return SDValue();
14338 
14339   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
14340 }
14341 
14342 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
14343 // or turn a shuffle of a single concat into simpler shuffle then concat.
14344 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
14345   EVT VT = N->getValueType(0);
14346   unsigned NumElts = VT.getVectorNumElements();
14347 
14348   SDValue N0 = N->getOperand(0);
14349   SDValue N1 = N->getOperand(1);
14350   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14351 
14352   SmallVector<SDValue, 4> Ops;
14353   EVT ConcatVT = N0.getOperand(0).getValueType();
14354   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
14355   unsigned NumConcats = NumElts / NumElemsPerConcat;
14356 
14357   // Special case: shuffle(concat(A,B)) can be more efficiently represented
14358   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
14359   // half vector elements.
14360   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
14361       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
14362                   SVN->getMask().end(), [](int i) { return i == -1; })) {
14363     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
14364                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
14365     N1 = DAG.getUNDEF(ConcatVT);
14366     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
14367   }
14368 
14369   // Look at every vector that's inserted. We're looking for exact
14370   // subvector-sized copies from a concatenated vector
14371   for (unsigned I = 0; I != NumConcats; ++I) {
14372     // Make sure we're dealing with a copy.
14373     unsigned Begin = I * NumElemsPerConcat;
14374     bool AllUndef = true, NoUndef = true;
14375     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
14376       if (SVN->getMaskElt(J) >= 0)
14377         AllUndef = false;
14378       else
14379         NoUndef = false;
14380     }
14381 
14382     if (NoUndef) {
14383       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
14384         return SDValue();
14385 
14386       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
14387         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
14388           return SDValue();
14389 
14390       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
14391       if (FirstElt < N0.getNumOperands())
14392         Ops.push_back(N0.getOperand(FirstElt));
14393       else
14394         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
14395 
14396     } else if (AllUndef) {
14397       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
14398     } else { // Mixed with general masks and undefs, can't do optimization.
14399       return SDValue();
14400     }
14401   }
14402 
14403   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
14404 }
14405 
14406 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14407 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14408 //
14409 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
14410 // a simplification in some sense, but it isn't appropriate in general: some
14411 // BUILD_VECTORs are substantially cheaper than others. The general case
14412 // of a BUILD_VECTOR requires inserting each element individually (or
14413 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
14414 // all constants is a single constant pool load.  A BUILD_VECTOR where each
14415 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
14416 // are undef lowers to a small number of element insertions.
14417 //
14418 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
14419 // We don't fold shuffles where one side is a non-zero constant, and we don't
14420 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
14421 // non-constant operands. This seems to work out reasonably well in practice.
14422 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
14423                                        SelectionDAG &DAG,
14424                                        const TargetLowering &TLI) {
14425   EVT VT = SVN->getValueType(0);
14426   unsigned NumElts = VT.getVectorNumElements();
14427   SDValue N0 = SVN->getOperand(0);
14428   SDValue N1 = SVN->getOperand(1);
14429 
14430   if (!N0->hasOneUse() || !N1->hasOneUse())
14431     return SDValue();
14432   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
14433   // discussed above.
14434   if (!N1.isUndef()) {
14435     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
14436     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
14437     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
14438       return SDValue();
14439     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
14440       return SDValue();
14441   }
14442 
14443   SmallVector<SDValue, 8> Ops;
14444   SmallSet<SDValue, 16> DuplicateOps;
14445   for (int M : SVN->getMask()) {
14446     SDValue Op = DAG.getUNDEF(VT.getScalarType());
14447     if (M >= 0) {
14448       int Idx = M < (int)NumElts ? M : M - NumElts;
14449       SDValue &S = (M < (int)NumElts ? N0 : N1);
14450       if (S.getOpcode() == ISD::BUILD_VECTOR) {
14451         Op = S.getOperand(Idx);
14452       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14453         if (Idx == 0)
14454           Op = S.getOperand(0);
14455       } else {
14456         // Operand can't be combined - bail out.
14457         return SDValue();
14458       }
14459     }
14460 
14461     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
14462     // fine, but it's likely to generate low-quality code if the target can't
14463     // reconstruct an appropriate shuffle.
14464     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
14465       if (!DuplicateOps.insert(Op).second)
14466         return SDValue();
14467 
14468     Ops.push_back(Op);
14469   }
14470   // BUILD_VECTOR requires all inputs to be of the same type, find the
14471   // maximum type and extend them all.
14472   EVT SVT = VT.getScalarType();
14473   if (SVT.isInteger())
14474     for (SDValue &Op : Ops)
14475       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
14476   if (SVT != VT.getScalarType())
14477     for (SDValue &Op : Ops)
14478       Op = TLI.isZExtFree(Op.getValueType(), SVT)
14479                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
14480                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
14481   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
14482 }
14483 
14484 // Match shuffles that can be converted to any_vector_extend_in_reg.
14485 // This is often generated during legalization.
14486 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
14487 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
14488 SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
14489                                      SelectionDAG &DAG,
14490                                      const TargetLowering &TLI,
14491                                      bool LegalOperations) {
14492   EVT VT = SVN->getValueType(0);
14493   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14494 
14495   // TODO Add support for big-endian when we have a test case.
14496   if (!VT.isInteger() || IsBigEndian)
14497     return SDValue();
14498 
14499   unsigned NumElts = VT.getVectorNumElements();
14500   unsigned EltSizeInBits = VT.getScalarSizeInBits();
14501   ArrayRef<int> Mask = SVN->getMask();
14502   SDValue N0 = SVN->getOperand(0);
14503 
14504   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
14505   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
14506     for (unsigned i = 0; i != NumElts; ++i) {
14507       if (Mask[i] < 0)
14508         continue;
14509       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
14510         continue;
14511       return false;
14512     }
14513     return true;
14514   };
14515 
14516   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
14517   // power-of-2 extensions as they are the most likely.
14518   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
14519     if (!isAnyExtend(Scale))
14520       continue;
14521 
14522     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
14523     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
14524     if (!LegalOperations ||
14525         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
14526       return DAG.getBitcast(VT,
14527                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
14528   }
14529 
14530   return SDValue();
14531 }
14532 
14533 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
14534 // each source element of a large type into the lowest elements of a smaller
14535 // destination type. This is often generated during legalization.
14536 // If the source node itself was a '*_extend_vector_inreg' node then we should
14537 // then be able to remove it.
14538 SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
14539   EVT VT = SVN->getValueType(0);
14540   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14541 
14542   // TODO Add support for big-endian when we have a test case.
14543   if (!VT.isInteger() || IsBigEndian)
14544     return SDValue();
14545 
14546   SDValue N0 = SVN->getOperand(0);
14547   while (N0.getOpcode() == ISD::BITCAST)
14548     N0 = N0.getOperand(0);
14549 
14550   unsigned Opcode = N0.getOpcode();
14551   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
14552       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
14553       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
14554     return SDValue();
14555 
14556   SDValue N00 = N0.getOperand(0);
14557   ArrayRef<int> Mask = SVN->getMask();
14558   unsigned NumElts = VT.getVectorNumElements();
14559   unsigned EltSizeInBits = VT.getScalarSizeInBits();
14560   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
14561 
14562   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
14563   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
14564   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
14565   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
14566     for (unsigned i = 0; i != NumElts; ++i) {
14567       if (Mask[i] < 0)
14568         continue;
14569       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
14570         continue;
14571       return false;
14572     }
14573     return true;
14574   };
14575 
14576   // At the moment we just handle the case where we've truncated back to the
14577   // same size as before the extension.
14578   // TODO: handle more extension/truncation cases as cases arise.
14579   if (EltSizeInBits != ExtSrcSizeInBits)
14580     return SDValue();
14581 
14582   // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
14583   // power-of-2 truncations as they are the most likely.
14584   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
14585     if (isTruncate(Scale))
14586       return DAG.getBitcast(VT, N00);
14587 
14588   return SDValue();
14589 }
14590 
14591 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
14592   EVT VT = N->getValueType(0);
14593   unsigned NumElts = VT.getVectorNumElements();
14594 
14595   SDValue N0 = N->getOperand(0);
14596   SDValue N1 = N->getOperand(1);
14597 
14598   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
14599 
14600   // Canonicalize shuffle undef, undef -> undef
14601   if (N0.isUndef() && N1.isUndef())
14602     return DAG.getUNDEF(VT);
14603 
14604   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14605 
14606   // Canonicalize shuffle v, v -> v, undef
14607   if (N0 == N1) {
14608     SmallVector<int, 8> NewMask;
14609     for (unsigned i = 0; i != NumElts; ++i) {
14610       int Idx = SVN->getMaskElt(i);
14611       if (Idx >= (int)NumElts) Idx -= NumElts;
14612       NewMask.push_back(Idx);
14613     }
14614     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
14615   }
14616 
14617   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
14618   if (N0.isUndef())
14619     return DAG.getCommutedVectorShuffle(*SVN);
14620 
14621   // Remove references to rhs if it is undef
14622   if (N1.isUndef()) {
14623     bool Changed = false;
14624     SmallVector<int, 8> NewMask;
14625     for (unsigned i = 0; i != NumElts; ++i) {
14626       int Idx = SVN->getMaskElt(i);
14627       if (Idx >= (int)NumElts) {
14628         Idx = -1;
14629         Changed = true;
14630       }
14631       NewMask.push_back(Idx);
14632     }
14633     if (Changed)
14634       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
14635   }
14636 
14637   // If it is a splat, check if the argument vector is another splat or a
14638   // build_vector.
14639   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
14640     SDNode *V = N0.getNode();
14641 
14642     // If this is a bit convert that changes the element type of the vector but
14643     // not the number of vector elements, look through it.  Be careful not to
14644     // look though conversions that change things like v4f32 to v2f64.
14645     if (V->getOpcode() == ISD::BITCAST) {
14646       SDValue ConvInput = V->getOperand(0);
14647       if (ConvInput.getValueType().isVector() &&
14648           ConvInput.getValueType().getVectorNumElements() == NumElts)
14649         V = ConvInput.getNode();
14650     }
14651 
14652     if (V->getOpcode() == ISD::BUILD_VECTOR) {
14653       assert(V->getNumOperands() == NumElts &&
14654              "BUILD_VECTOR has wrong number of operands");
14655       SDValue Base;
14656       bool AllSame = true;
14657       for (unsigned i = 0; i != NumElts; ++i) {
14658         if (!V->getOperand(i).isUndef()) {
14659           Base = V->getOperand(i);
14660           break;
14661         }
14662       }
14663       // Splat of <u, u, u, u>, return <u, u, u, u>
14664       if (!Base.getNode())
14665         return N0;
14666       for (unsigned i = 0; i != NumElts; ++i) {
14667         if (V->getOperand(i) != Base) {
14668           AllSame = false;
14669           break;
14670         }
14671       }
14672       // Splat of <x, x, x, x>, return <x, x, x, x>
14673       if (AllSame)
14674         return N0;
14675 
14676       // Canonicalize any other splat as a build_vector.
14677       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
14678       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
14679       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
14680 
14681       // We may have jumped through bitcasts, so the type of the
14682       // BUILD_VECTOR may not match the type of the shuffle.
14683       if (V->getValueType(0) != VT)
14684         NewBV = DAG.getBitcast(VT, NewBV);
14685       return NewBV;
14686     }
14687   }
14688 
14689   // There are various patterns used to build up a vector from smaller vectors,
14690   // subvectors, or elements. Scan chains of these and replace unused insertions
14691   // or components with undef.
14692   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
14693     return S;
14694 
14695   // Match shuffles that can be converted to any_vector_extend_in_reg.
14696   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
14697     return V;
14698 
14699   // Combine "truncate_vector_in_reg" style shuffles.
14700   if (SDValue V = combineTruncationShuffle(SVN, DAG))
14701     return V;
14702 
14703   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
14704       Level < AfterLegalizeVectorOps &&
14705       (N1.isUndef() ||
14706       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
14707        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
14708     if (SDValue V = partitionShuffleOfConcats(N, DAG))
14709       return V;
14710   }
14711 
14712   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14713   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14714   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14715     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
14716       return Res;
14717 
14718   // If this shuffle only has a single input that is a bitcasted shuffle,
14719   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
14720   // back to their original types.
14721   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14722       N1.isUndef() && Level < AfterLegalizeVectorOps &&
14723       TLI.isTypeLegal(VT)) {
14724 
14725     // Peek through the bitcast only if there is one user.
14726     SDValue BC0 = N0;
14727     while (BC0.getOpcode() == ISD::BITCAST) {
14728       if (!BC0.hasOneUse())
14729         break;
14730       BC0 = BC0.getOperand(0);
14731     }
14732 
14733     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
14734       if (Scale == 1)
14735         return SmallVector<int, 8>(Mask.begin(), Mask.end());
14736 
14737       SmallVector<int, 8> NewMask;
14738       for (int M : Mask)
14739         for (int s = 0; s != Scale; ++s)
14740           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
14741       return NewMask;
14742     };
14743 
14744     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
14745       EVT SVT = VT.getScalarType();
14746       EVT InnerVT = BC0->getValueType(0);
14747       EVT InnerSVT = InnerVT.getScalarType();
14748 
14749       // Determine which shuffle works with the smaller scalar type.
14750       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
14751       EVT ScaleSVT = ScaleVT.getScalarType();
14752 
14753       if (TLI.isTypeLegal(ScaleVT) &&
14754           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
14755           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
14756 
14757         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14758         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14759 
14760         // Scale the shuffle masks to the smaller scalar type.
14761         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
14762         SmallVector<int, 8> InnerMask =
14763             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
14764         SmallVector<int, 8> OuterMask =
14765             ScaleShuffleMask(SVN->getMask(), OuterScale);
14766 
14767         // Merge the shuffle masks.
14768         SmallVector<int, 8> NewMask;
14769         for (int M : OuterMask)
14770           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
14771 
14772         // Test for shuffle mask legality over both commutations.
14773         SDValue SV0 = BC0->getOperand(0);
14774         SDValue SV1 = BC0->getOperand(1);
14775         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14776         if (!LegalMask) {
14777           std::swap(SV0, SV1);
14778           ShuffleVectorSDNode::commuteMask(NewMask);
14779           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14780         }
14781 
14782         if (LegalMask) {
14783           SV0 = DAG.getBitcast(ScaleVT, SV0);
14784           SV1 = DAG.getBitcast(ScaleVT, SV1);
14785           return DAG.getBitcast(
14786               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
14787         }
14788       }
14789     }
14790   }
14791 
14792   // Canonicalize shuffles according to rules:
14793   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
14794   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
14795   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
14796   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
14797       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
14798       TLI.isTypeLegal(VT)) {
14799     // The incoming shuffle must be of the same type as the result of the
14800     // current shuffle.
14801     assert(N1->getOperand(0).getValueType() == VT &&
14802            "Shuffle types don't match");
14803 
14804     SDValue SV0 = N1->getOperand(0);
14805     SDValue SV1 = N1->getOperand(1);
14806     bool HasSameOp0 = N0 == SV0;
14807     bool IsSV1Undef = SV1.isUndef();
14808     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
14809       // Commute the operands of this shuffle so that next rule
14810       // will trigger.
14811       return DAG.getCommutedVectorShuffle(*SVN);
14812   }
14813 
14814   // Try to fold according to rules:
14815   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14816   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14817   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14818   // Don't try to fold shuffles with illegal type.
14819   // Only fold if this shuffle is the only user of the other shuffle.
14820   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
14821       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
14822     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
14823 
14824     // Don't try to fold splats; they're likely to simplify somehow, or they
14825     // might be free.
14826     if (OtherSV->isSplat())
14827       return SDValue();
14828 
14829     // The incoming shuffle must be of the same type as the result of the
14830     // current shuffle.
14831     assert(OtherSV->getOperand(0).getValueType() == VT &&
14832            "Shuffle types don't match");
14833 
14834     SDValue SV0, SV1;
14835     SmallVector<int, 4> Mask;
14836     // Compute the combined shuffle mask for a shuffle with SV0 as the first
14837     // operand, and SV1 as the second operand.
14838     for (unsigned i = 0; i != NumElts; ++i) {
14839       int Idx = SVN->getMaskElt(i);
14840       if (Idx < 0) {
14841         // Propagate Undef.
14842         Mask.push_back(Idx);
14843         continue;
14844       }
14845 
14846       SDValue CurrentVec;
14847       if (Idx < (int)NumElts) {
14848         // This shuffle index refers to the inner shuffle N0. Lookup the inner
14849         // shuffle mask to identify which vector is actually referenced.
14850         Idx = OtherSV->getMaskElt(Idx);
14851         if (Idx < 0) {
14852           // Propagate Undef.
14853           Mask.push_back(Idx);
14854           continue;
14855         }
14856 
14857         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
14858                                            : OtherSV->getOperand(1);
14859       } else {
14860         // This shuffle index references an element within N1.
14861         CurrentVec = N1;
14862       }
14863 
14864       // Simple case where 'CurrentVec' is UNDEF.
14865       if (CurrentVec.isUndef()) {
14866         Mask.push_back(-1);
14867         continue;
14868       }
14869 
14870       // Canonicalize the shuffle index. We don't know yet if CurrentVec
14871       // will be the first or second operand of the combined shuffle.
14872       Idx = Idx % NumElts;
14873       if (!SV0.getNode() || SV0 == CurrentVec) {
14874         // Ok. CurrentVec is the left hand side.
14875         // Update the mask accordingly.
14876         SV0 = CurrentVec;
14877         Mask.push_back(Idx);
14878         continue;
14879       }
14880 
14881       // Bail out if we cannot convert the shuffle pair into a single shuffle.
14882       if (SV1.getNode() && SV1 != CurrentVec)
14883         return SDValue();
14884 
14885       // Ok. CurrentVec is the right hand side.
14886       // Update the mask accordingly.
14887       SV1 = CurrentVec;
14888       Mask.push_back(Idx + NumElts);
14889     }
14890 
14891     // Check if all indices in Mask are Undef. In case, propagate Undef.
14892     bool isUndefMask = true;
14893     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
14894       isUndefMask &= Mask[i] < 0;
14895 
14896     if (isUndefMask)
14897       return DAG.getUNDEF(VT);
14898 
14899     if (!SV0.getNode())
14900       SV0 = DAG.getUNDEF(VT);
14901     if (!SV1.getNode())
14902       SV1 = DAG.getUNDEF(VT);
14903 
14904     // Avoid introducing shuffles with illegal mask.
14905     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
14906       ShuffleVectorSDNode::commuteMask(Mask);
14907 
14908       if (!TLI.isShuffleMaskLegal(Mask, VT))
14909         return SDValue();
14910 
14911       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
14912       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
14913       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
14914       std::swap(SV0, SV1);
14915     }
14916 
14917     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14918     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14919     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14920     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
14921   }
14922 
14923   return SDValue();
14924 }
14925 
14926 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
14927   SDValue InVal = N->getOperand(0);
14928   EVT VT = N->getValueType(0);
14929 
14930   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
14931   // with a VECTOR_SHUFFLE.
14932   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14933     SDValue InVec = InVal->getOperand(0);
14934     SDValue EltNo = InVal->getOperand(1);
14935 
14936     // FIXME: We could support implicit truncation if the shuffle can be
14937     // scaled to a smaller vector scalar type.
14938     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
14939     if (C0 && VT == InVec.getValueType() &&
14940         VT.getScalarType() == InVal.getValueType()) {
14941       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
14942       int Elt = C0->getZExtValue();
14943       NewMask[0] = Elt;
14944 
14945       if (TLI.isShuffleMaskLegal(NewMask, VT))
14946         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
14947                                     NewMask);
14948     }
14949   }
14950 
14951   return SDValue();
14952 }
14953 
14954 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
14955   EVT VT = N->getValueType(0);
14956   SDValue N0 = N->getOperand(0);
14957   SDValue N1 = N->getOperand(1);
14958   SDValue N2 = N->getOperand(2);
14959 
14960   // If inserting an UNDEF, just return the original vector.
14961   if (N1.isUndef())
14962     return N0;
14963 
14964   // If this is an insert of an extracted vector into an undef vector, we can
14965   // just use the input to the extract.
14966   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
14967       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
14968     return N1.getOperand(0);
14969 
14970   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
14971   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
14972   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
14973   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
14974       N0.getOperand(1).getValueType() == N1.getValueType() &&
14975       N0.getOperand(2) == N2)
14976     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
14977                        N1, N2);
14978 
14979   if (!isa<ConstantSDNode>(N2))
14980     return SDValue();
14981 
14982   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
14983 
14984   // Canonicalize insert_subvector dag nodes.
14985   // Example:
14986   // (insert_subvector (insert_subvector A, Idx0), Idx1)
14987   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
14988   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
14989       N1.getValueType() == N0.getOperand(1).getValueType() &&
14990       isa<ConstantSDNode>(N0.getOperand(2))) {
14991     unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
14992     if (InsIdx < OtherIdx) {
14993       // Swap nodes.
14994       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
14995                                   N0.getOperand(0), N1, N2);
14996       AddToWorklist(NewOp.getNode());
14997       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
14998                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
14999     }
15000   }
15001 
15002   // If the input vector is a concatenation, and the insert replaces
15003   // one of the pieces, we can optimize into a single concat_vectors.
15004   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15005       N0.getOperand(0).getValueType() == N1.getValueType()) {
15006     unsigned Factor = N1.getValueType().getVectorNumElements();
15007 
15008     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15009     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15010 
15011     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15012   }
15013 
15014   return SDValue();
15015 }
15016 
15017 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15018   SDValue N0 = N->getOperand(0);
15019 
15020   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15021   if (N0->getOpcode() == ISD::FP16_TO_FP)
15022     return N0->getOperand(0);
15023 
15024   return SDValue();
15025 }
15026 
15027 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15028   SDValue N0 = N->getOperand(0);
15029 
15030   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15031   if (N0->getOpcode() == ISD::AND) {
15032     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15033     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15034       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15035                          N0.getOperand(0));
15036     }
15037   }
15038 
15039   return SDValue();
15040 }
15041 
15042 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15043 /// with the destination vector and a zero vector.
15044 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15045 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15046 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15047   EVT VT = N->getValueType(0);
15048   SDValue LHS = N->getOperand(0);
15049   SDValue RHS = N->getOperand(1);
15050   SDLoc DL(N);
15051 
15052   // Make sure we're not running after operation legalization where it
15053   // may have custom lowered the vector shuffles.
15054   if (LegalOperations)
15055     return SDValue();
15056 
15057   if (N->getOpcode() != ISD::AND)
15058     return SDValue();
15059 
15060   if (RHS.getOpcode() == ISD::BITCAST)
15061     RHS = RHS.getOperand(0);
15062 
15063   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15064     return SDValue();
15065 
15066   EVT RVT = RHS.getValueType();
15067   unsigned NumElts = RHS.getNumOperands();
15068 
15069   // Attempt to create a valid clear mask, splitting the mask into
15070   // sub elements and checking to see if each is
15071   // all zeros or all ones - suitable for shuffle masking.
15072   auto BuildClearMask = [&](int Split) {
15073     int NumSubElts = NumElts * Split;
15074     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15075 
15076     SmallVector<int, 8> Indices;
15077     for (int i = 0; i != NumSubElts; ++i) {
15078       int EltIdx = i / Split;
15079       int SubIdx = i % Split;
15080       SDValue Elt = RHS.getOperand(EltIdx);
15081       if (Elt.isUndef()) {
15082         Indices.push_back(-1);
15083         continue;
15084       }
15085 
15086       APInt Bits;
15087       if (isa<ConstantSDNode>(Elt))
15088         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15089       else if (isa<ConstantFPSDNode>(Elt))
15090         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15091       else
15092         return SDValue();
15093 
15094       // Extract the sub element from the constant bit mask.
15095       if (DAG.getDataLayout().isBigEndian()) {
15096         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
15097       } else {
15098         Bits = Bits.lshr(SubIdx * NumSubBits);
15099       }
15100 
15101       if (Split > 1)
15102         Bits = Bits.trunc(NumSubBits);
15103 
15104       if (Bits.isAllOnesValue())
15105         Indices.push_back(i);
15106       else if (Bits == 0)
15107         Indices.push_back(i + NumSubElts);
15108       else
15109         return SDValue();
15110     }
15111 
15112     // Let's see if the target supports this vector_shuffle.
15113     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15114     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15115     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15116       return SDValue();
15117 
15118     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15119     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15120                                                    DAG.getBitcast(ClearVT, LHS),
15121                                                    Zero, Indices));
15122   };
15123 
15124   // Determine maximum split level (byte level masking).
15125   int MaxSplit = 1;
15126   if (RVT.getScalarSizeInBits() % 8 == 0)
15127     MaxSplit = RVT.getScalarSizeInBits() / 8;
15128 
15129   for (int Split = 1; Split <= MaxSplit; ++Split)
15130     if (RVT.getScalarSizeInBits() % Split == 0)
15131       if (SDValue S = BuildClearMask(Split))
15132         return S;
15133 
15134   return SDValue();
15135 }
15136 
15137 /// Visit a binary vector operation, like ADD.
15138 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15139   assert(N->getValueType(0).isVector() &&
15140          "SimplifyVBinOp only works on vectors!");
15141 
15142   SDValue LHS = N->getOperand(0);
15143   SDValue RHS = N->getOperand(1);
15144   SDValue Ops[] = {LHS, RHS};
15145 
15146   // See if we can constant fold the vector operation.
15147   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15148           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15149     return Fold;
15150 
15151   // Try to convert a constant mask AND into a shuffle clear mask.
15152   if (SDValue Shuffle = XformToShuffleWithZero(N))
15153     return Shuffle;
15154 
15155   // Type legalization might introduce new shuffles in the DAG.
15156   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15157   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15158   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15159       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15160       LHS.getOperand(1).isUndef() &&
15161       RHS.getOperand(1).isUndef()) {
15162     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15163     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15164 
15165     if (SVN0->getMask().equals(SVN1->getMask())) {
15166       EVT VT = N->getValueType(0);
15167       SDValue UndefVector = LHS.getOperand(1);
15168       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15169                                      LHS.getOperand(0), RHS.getOperand(0),
15170                                      N->getFlags());
15171       AddUsersToWorklist(N);
15172       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15173                                   SVN0->getMask());
15174     }
15175   }
15176 
15177   return SDValue();
15178 }
15179 
15180 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15181                                     SDValue N2) {
15182   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15183 
15184   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15185                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15186 
15187   // If we got a simplified select_cc node back from SimplifySelectCC, then
15188   // break it down into a new SETCC node, and a new SELECT node, and then return
15189   // the SELECT node, since we were called with a SELECT node.
15190   if (SCC.getNode()) {
15191     // Check to see if we got a select_cc back (to turn into setcc/select).
15192     // Otherwise, just return whatever node we got back, like fabs.
15193     if (SCC.getOpcode() == ISD::SELECT_CC) {
15194       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15195                                   N0.getValueType(),
15196                                   SCC.getOperand(0), SCC.getOperand(1),
15197                                   SCC.getOperand(4));
15198       AddToWorklist(SETCC.getNode());
15199       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15200                            SCC.getOperand(2), SCC.getOperand(3));
15201     }
15202 
15203     return SCC;
15204   }
15205   return SDValue();
15206 }
15207 
15208 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15209 /// being selected between, see if we can simplify the select.  Callers of this
15210 /// should assume that TheSelect is deleted if this returns true.  As such, they
15211 /// should return the appropriate thing (e.g. the node) back to the top-level of
15212 /// the DAG combiner loop to avoid it being looked at.
15213 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15214                                     SDValue RHS) {
15215 
15216   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15217   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
15218   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
15219     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
15220       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
15221       SDValue Sqrt = RHS;
15222       ISD::CondCode CC;
15223       SDValue CmpLHS;
15224       const ConstantFPSDNode *Zero = nullptr;
15225 
15226       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
15227         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
15228         CmpLHS = TheSelect->getOperand(0);
15229         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
15230       } else {
15231         // SELECT or VSELECT
15232         SDValue Cmp = TheSelect->getOperand(0);
15233         if (Cmp.getOpcode() == ISD::SETCC) {
15234           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
15235           CmpLHS = Cmp.getOperand(0);
15236           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
15237         }
15238       }
15239       if (Zero && Zero->isZero() &&
15240           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
15241           CC == ISD::SETULT || CC == ISD::SETLT)) {
15242         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15243         CombineTo(TheSelect, Sqrt);
15244         return true;
15245       }
15246     }
15247   }
15248   // Cannot simplify select with vector condition
15249   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
15250 
15251   // If this is a select from two identical things, try to pull the operation
15252   // through the select.
15253   if (LHS.getOpcode() != RHS.getOpcode() ||
15254       !LHS.hasOneUse() || !RHS.hasOneUse())
15255     return false;
15256 
15257   // If this is a load and the token chain is identical, replace the select
15258   // of two loads with a load through a select of the address to load from.
15259   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
15260   // constants have been dropped into the constant pool.
15261   if (LHS.getOpcode() == ISD::LOAD) {
15262     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
15263     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
15264 
15265     // Token chains must be identical.
15266     if (LHS.getOperand(0) != RHS.getOperand(0) ||
15267         // Do not let this transformation reduce the number of volatile loads.
15268         LLD->isVolatile() || RLD->isVolatile() ||
15269         // FIXME: If either is a pre/post inc/dec load,
15270         // we'd need to split out the address adjustment.
15271         LLD->isIndexed() || RLD->isIndexed() ||
15272         // If this is an EXTLOAD, the VT's must match.
15273         LLD->getMemoryVT() != RLD->getMemoryVT() ||
15274         // If this is an EXTLOAD, the kind of extension must match.
15275         (LLD->getExtensionType() != RLD->getExtensionType() &&
15276          // The only exception is if one of the extensions is anyext.
15277          LLD->getExtensionType() != ISD::EXTLOAD &&
15278          RLD->getExtensionType() != ISD::EXTLOAD) ||
15279         // FIXME: this discards src value information.  This is
15280         // over-conservative. It would be beneficial to be able to remember
15281         // both potential memory locations.  Since we are discarding
15282         // src value info, don't do the transformation if the memory
15283         // locations are not in the default address space.
15284         LLD->getPointerInfo().getAddrSpace() != 0 ||
15285         RLD->getPointerInfo().getAddrSpace() != 0 ||
15286         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
15287                                       LLD->getBasePtr().getValueType()))
15288       return false;
15289 
15290     // Check that the select condition doesn't reach either load.  If so,
15291     // folding this will induce a cycle into the DAG.  If not, this is safe to
15292     // xform, so create a select of the addresses.
15293     SDValue Addr;
15294     if (TheSelect->getOpcode() == ISD::SELECT) {
15295       SDNode *CondNode = TheSelect->getOperand(0).getNode();
15296       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
15297           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
15298         return false;
15299       // The loads must not depend on one another.
15300       if (LLD->isPredecessorOf(RLD) ||
15301           RLD->isPredecessorOf(LLD))
15302         return false;
15303       Addr = DAG.getSelect(SDLoc(TheSelect),
15304                            LLD->getBasePtr().getValueType(),
15305                            TheSelect->getOperand(0), LLD->getBasePtr(),
15306                            RLD->getBasePtr());
15307     } else {  // Otherwise SELECT_CC
15308       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
15309       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
15310 
15311       if ((LLD->hasAnyUseOfValue(1) &&
15312            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
15313           (RLD->hasAnyUseOfValue(1) &&
15314            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
15315         return false;
15316 
15317       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
15318                          LLD->getBasePtr().getValueType(),
15319                          TheSelect->getOperand(0),
15320                          TheSelect->getOperand(1),
15321                          LLD->getBasePtr(), RLD->getBasePtr(),
15322                          TheSelect->getOperand(4));
15323     }
15324 
15325     SDValue Load;
15326     // It is safe to replace the two loads if they have different alignments,
15327     // but the new load must be the minimum (most restrictive) alignment of the
15328     // inputs.
15329     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
15330     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
15331     if (!RLD->isInvariant())
15332       MMOFlags &= ~MachineMemOperand::MOInvariant;
15333     if (!RLD->isDereferenceable())
15334       MMOFlags &= ~MachineMemOperand::MODereferenceable;
15335     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
15336       // FIXME: Discards pointer and AA info.
15337       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
15338                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
15339                          MMOFlags);
15340     } else {
15341       // FIXME: Discards pointer and AA info.
15342       Load = DAG.getExtLoad(
15343           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
15344                                                   : LLD->getExtensionType(),
15345           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
15346           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
15347     }
15348 
15349     // Users of the select now use the result of the load.
15350     CombineTo(TheSelect, Load);
15351 
15352     // Users of the old loads now use the new load's chain.  We know the
15353     // old-load value is dead now.
15354     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
15355     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
15356     return true;
15357   }
15358 
15359   return false;
15360 }
15361 
15362 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
15363 /// bitwise 'and'.
15364 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
15365                                             SDValue N1, SDValue N2, SDValue N3,
15366                                             ISD::CondCode CC) {
15367   // If this is a select where the false operand is zero and the compare is a
15368   // check of the sign bit, see if we can perform the "gzip trick":
15369   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
15370   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
15371   EVT XType = N0.getValueType();
15372   EVT AType = N2.getValueType();
15373   if (!isNullConstant(N3) || !XType.bitsGE(AType))
15374     return SDValue();
15375 
15376   // If the comparison is testing for a positive value, we have to invert
15377   // the sign bit mask, so only do that transform if the target has a bitwise
15378   // 'and not' instruction (the invert is free).
15379   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
15380     // (X > -1) ? A : 0
15381     // (X >  0) ? X : 0 <-- This is canonical signed max.
15382     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
15383       return SDValue();
15384   } else if (CC == ISD::SETLT) {
15385     // (X <  0) ? A : 0
15386     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
15387     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
15388       return SDValue();
15389   } else {
15390     return SDValue();
15391   }
15392 
15393   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
15394   // constant.
15395   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
15396   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15397   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
15398     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
15399     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
15400     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
15401     AddToWorklist(Shift.getNode());
15402 
15403     if (XType.bitsGT(AType)) {
15404       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15405       AddToWorklist(Shift.getNode());
15406     }
15407 
15408     if (CC == ISD::SETGT)
15409       Shift = DAG.getNOT(DL, Shift, AType);
15410 
15411     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15412   }
15413 
15414   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
15415   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
15416   AddToWorklist(Shift.getNode());
15417 
15418   if (XType.bitsGT(AType)) {
15419     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15420     AddToWorklist(Shift.getNode());
15421   }
15422 
15423   if (CC == ISD::SETGT)
15424     Shift = DAG.getNOT(DL, Shift, AType);
15425 
15426   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15427 }
15428 
15429 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
15430 /// where 'cond' is the comparison specified by CC.
15431 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
15432                                       SDValue N2, SDValue N3, ISD::CondCode CC,
15433                                       bool NotExtCompare) {
15434   // (x ? y : y) -> y.
15435   if (N2 == N3) return N2;
15436 
15437   EVT VT = N2.getValueType();
15438   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
15439   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15440 
15441   // Determine if the condition we're dealing with is constant
15442   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
15443                               N0, N1, CC, DL, false);
15444   if (SCC.getNode()) AddToWorklist(SCC.getNode());
15445 
15446   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
15447     // fold select_cc true, x, y -> x
15448     // fold select_cc false, x, y -> y
15449     return !SCCC->isNullValue() ? N2 : N3;
15450   }
15451 
15452   // Check to see if we can simplify the select into an fabs node
15453   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15454     // Allow either -0.0 or 0.0
15455     if (CFP->isZero()) {
15456       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
15457       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
15458           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
15459           N2 == N3.getOperand(0))
15460         return DAG.getNode(ISD::FABS, DL, VT, N0);
15461 
15462       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
15463       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
15464           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
15465           N2.getOperand(0) == N3)
15466         return DAG.getNode(ISD::FABS, DL, VT, N3);
15467     }
15468   }
15469 
15470   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
15471   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
15472   // in it.  This is a win when the constant is not otherwise available because
15473   // it replaces two constant pool loads with one.  We only do this if the FP
15474   // type is known to be legal, because if it isn't, then we are before legalize
15475   // types an we want the other legalization to happen first (e.g. to avoid
15476   // messing with soft float) and if the ConstantFP is not legal, because if
15477   // it is legal, we may not need to store the FP constant in a constant pool.
15478   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
15479     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
15480       if (TLI.isTypeLegal(N2.getValueType()) &&
15481           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
15482                TargetLowering::Legal &&
15483            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
15484            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
15485           // If both constants have multiple uses, then we won't need to do an
15486           // extra load, they are likely around in registers for other users.
15487           (TV->hasOneUse() || FV->hasOneUse())) {
15488         Constant *Elts[] = {
15489           const_cast<ConstantFP*>(FV->getConstantFPValue()),
15490           const_cast<ConstantFP*>(TV->getConstantFPValue())
15491         };
15492         Type *FPTy = Elts[0]->getType();
15493         const DataLayout &TD = DAG.getDataLayout();
15494 
15495         // Create a ConstantArray of the two constants.
15496         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
15497         SDValue CPIdx =
15498             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
15499                                 TD.getPrefTypeAlignment(FPTy));
15500         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
15501 
15502         // Get the offsets to the 0 and 1 element of the array so that we can
15503         // select between them.
15504         SDValue Zero = DAG.getIntPtrConstant(0, DL);
15505         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
15506         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
15507 
15508         SDValue Cond = DAG.getSetCC(DL,
15509                                     getSetCCResultType(N0.getValueType()),
15510                                     N0, N1, CC);
15511         AddToWorklist(Cond.getNode());
15512         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
15513                                           Cond, One, Zero);
15514         AddToWorklist(CstOffset.getNode());
15515         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
15516                             CstOffset);
15517         AddToWorklist(CPIdx.getNode());
15518         return DAG.getLoad(
15519             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
15520             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
15521             Alignment);
15522       }
15523     }
15524 
15525   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
15526     return V;
15527 
15528   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
15529   // where y is has a single bit set.
15530   // A plaintext description would be, we can turn the SELECT_CC into an AND
15531   // when the condition can be materialized as an all-ones register.  Any
15532   // single bit-test can be materialized as an all-ones register with
15533   // shift-left and shift-right-arith.
15534   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
15535       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
15536     SDValue AndLHS = N0->getOperand(0);
15537     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15538     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
15539       // Shift the tested bit over the sign bit.
15540       const APInt &AndMask = ConstAndRHS->getAPIntValue();
15541       SDValue ShlAmt =
15542         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
15543                         getShiftAmountTy(AndLHS.getValueType()));
15544       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
15545 
15546       // Now arithmetic right shift it all the way over, so the result is either
15547       // all-ones, or zero.
15548       SDValue ShrAmt =
15549         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
15550                         getShiftAmountTy(Shl.getValueType()));
15551       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
15552 
15553       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
15554     }
15555   }
15556 
15557   // fold select C, 16, 0 -> shl C, 4
15558   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
15559       TLI.getBooleanContents(N0.getValueType()) ==
15560           TargetLowering::ZeroOrOneBooleanContent) {
15561 
15562     // If the caller doesn't want us to simplify this into a zext of a compare,
15563     // don't do it.
15564     if (NotExtCompare && N2C->isOne())
15565       return SDValue();
15566 
15567     // Get a SetCC of the condition
15568     // NOTE: Don't create a SETCC if it's not legal on this target.
15569     if (!LegalOperations ||
15570         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
15571       SDValue Temp, SCC;
15572       // cast from setcc result type to select result type
15573       if (LegalTypes) {
15574         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
15575                             N0, N1, CC);
15576         if (N2.getValueType().bitsLT(SCC.getValueType()))
15577           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
15578                                         N2.getValueType());
15579         else
15580           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
15581                              N2.getValueType(), SCC);
15582       } else {
15583         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
15584         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
15585                            N2.getValueType(), SCC);
15586       }
15587 
15588       AddToWorklist(SCC.getNode());
15589       AddToWorklist(Temp.getNode());
15590 
15591       if (N2C->isOne())
15592         return Temp;
15593 
15594       // shl setcc result by log2 n2c
15595       return DAG.getNode(
15596           ISD::SHL, DL, N2.getValueType(), Temp,
15597           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
15598                           getShiftAmountTy(Temp.getValueType())));
15599     }
15600   }
15601 
15602   // Check to see if this is an integer abs.
15603   // select_cc setg[te] X,  0,  X, -X ->
15604   // select_cc setgt    X, -1,  X, -X ->
15605   // select_cc setl[te] X,  0, -X,  X ->
15606   // select_cc setlt    X,  1, -X,  X ->
15607   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
15608   if (N1C) {
15609     ConstantSDNode *SubC = nullptr;
15610     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
15611          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
15612         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
15613       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
15614     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
15615               (N1C->isOne() && CC == ISD::SETLT)) &&
15616              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
15617       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
15618 
15619     EVT XType = N0.getValueType();
15620     if (SubC && SubC->isNullValue() && XType.isInteger()) {
15621       SDLoc DL(N0);
15622       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
15623                                   N0,
15624                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
15625                                          getShiftAmountTy(N0.getValueType())));
15626       SDValue Add = DAG.getNode(ISD::ADD, DL,
15627                                 XType, N0, Shift);
15628       AddToWorklist(Shift.getNode());
15629       AddToWorklist(Add.getNode());
15630       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
15631     }
15632   }
15633 
15634   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
15635   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
15636   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
15637   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
15638   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
15639   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
15640   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
15641   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
15642   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
15643     SDValue ValueOnZero = N2;
15644     SDValue Count = N3;
15645     // If the condition is NE instead of E, swap the operands.
15646     if (CC == ISD::SETNE)
15647       std::swap(ValueOnZero, Count);
15648     // Check if the value on zero is a constant equal to the bits in the type.
15649     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
15650       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
15651         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
15652         // legal, combine to just cttz.
15653         if ((Count.getOpcode() == ISD::CTTZ ||
15654              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
15655             N0 == Count.getOperand(0) &&
15656             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
15657           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
15658         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
15659         // legal, combine to just ctlz.
15660         if ((Count.getOpcode() == ISD::CTLZ ||
15661              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
15662             N0 == Count.getOperand(0) &&
15663             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
15664           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
15665       }
15666     }
15667   }
15668 
15669   return SDValue();
15670 }
15671 
15672 /// This is a stub for TargetLowering::SimplifySetCC.
15673 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
15674                                    ISD::CondCode Cond, const SDLoc &DL,
15675                                    bool foldBooleans) {
15676   TargetLowering::DAGCombinerInfo
15677     DagCombineInfo(DAG, Level, false, this);
15678   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
15679 }
15680 
15681 /// Given an ISD::SDIV node expressing a divide by constant, return
15682 /// a DAG expression to select that will generate the same value by multiplying
15683 /// by a magic number.
15684 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
15685 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
15686   // when optimising for minimum size, we don't want to expand a div to a mul
15687   // and a shift.
15688   if (DAG.getMachineFunction().getFunction()->optForMinSize())
15689     return SDValue();
15690 
15691   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15692   if (!C)
15693     return SDValue();
15694 
15695   // Avoid division by zero.
15696   if (C->isNullValue())
15697     return SDValue();
15698 
15699   std::vector<SDNode*> Built;
15700   SDValue S =
15701       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
15702 
15703   for (SDNode *N : Built)
15704     AddToWorklist(N);
15705   return S;
15706 }
15707 
15708 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
15709 /// DAG expression that will generate the same value by right shifting.
15710 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
15711   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15712   if (!C)
15713     return SDValue();
15714 
15715   // Avoid division by zero.
15716   if (C->isNullValue())
15717     return SDValue();
15718 
15719   std::vector<SDNode *> Built;
15720   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
15721 
15722   for (SDNode *N : Built)
15723     AddToWorklist(N);
15724   return S;
15725 }
15726 
15727 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
15728 /// expression that will generate the same value by multiplying by a magic
15729 /// number.
15730 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
15731 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
15732   // when optimising for minimum size, we don't want to expand a div to a mul
15733   // and a shift.
15734   if (DAG.getMachineFunction().getFunction()->optForMinSize())
15735     return SDValue();
15736 
15737   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15738   if (!C)
15739     return SDValue();
15740 
15741   // Avoid division by zero.
15742   if (C->isNullValue())
15743     return SDValue();
15744 
15745   std::vector<SDNode*> Built;
15746   SDValue S =
15747       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
15748 
15749   for (SDNode *N : Built)
15750     AddToWorklist(N);
15751   return S;
15752 }
15753 
15754 /// Determines the LogBase2 value for a non-null input value using the
15755 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
15756 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
15757   EVT VT = V.getValueType();
15758   unsigned EltBits = VT.getScalarSizeInBits();
15759   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
15760   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
15761   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
15762   return LogBase2;
15763 }
15764 
15765 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15766 /// For the reciprocal, we need to find the zero of the function:
15767 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
15768 ///     =>
15769 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
15770 ///     does not require additional intermediate precision]
15771 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
15772   if (Level >= AfterLegalizeDAG)
15773     return SDValue();
15774 
15775   // TODO: Handle half and/or extended types?
15776   EVT VT = Op.getValueType();
15777   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15778     return SDValue();
15779 
15780   // If estimates are explicitly disabled for this function, we're done.
15781   MachineFunction &MF = DAG.getMachineFunction();
15782   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
15783   if (Enabled == TLI.ReciprocalEstimate::Disabled)
15784     return SDValue();
15785 
15786   // Estimates may be explicitly enabled for this type with a custom number of
15787   // refinement steps.
15788   int Iterations = TLI.getDivRefinementSteps(VT, MF);
15789   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
15790     AddToWorklist(Est.getNode());
15791 
15792     if (Iterations) {
15793       EVT VT = Op.getValueType();
15794       SDLoc DL(Op);
15795       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15796 
15797       // Newton iterations: Est = Est + Est (1 - Arg * Est)
15798       for (int i = 0; i < Iterations; ++i) {
15799         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
15800         AddToWorklist(NewEst.getNode());
15801 
15802         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
15803         AddToWorklist(NewEst.getNode());
15804 
15805         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15806         AddToWorklist(NewEst.getNode());
15807 
15808         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
15809         AddToWorklist(Est.getNode());
15810       }
15811     }
15812     return Est;
15813   }
15814 
15815   return SDValue();
15816 }
15817 
15818 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15819 /// For the reciprocal sqrt, we need to find the zero of the function:
15820 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15821 ///     =>
15822 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
15823 /// As a result, we precompute A/2 prior to the iteration loop.
15824 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
15825                                          unsigned Iterations,
15826                                          SDNodeFlags *Flags, bool Reciprocal) {
15827   EVT VT = Arg.getValueType();
15828   SDLoc DL(Arg);
15829   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
15830 
15831   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
15832   // this entire sequence requires only one FP constant.
15833   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
15834   AddToWorklist(HalfArg.getNode());
15835 
15836   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
15837   AddToWorklist(HalfArg.getNode());
15838 
15839   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
15840   for (unsigned i = 0; i < Iterations; ++i) {
15841     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
15842     AddToWorklist(NewEst.getNode());
15843 
15844     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
15845     AddToWorklist(NewEst.getNode());
15846 
15847     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
15848     AddToWorklist(NewEst.getNode());
15849 
15850     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15851     AddToWorklist(Est.getNode());
15852   }
15853 
15854   // If non-reciprocal square root is requested, multiply the result by Arg.
15855   if (!Reciprocal) {
15856     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
15857     AddToWorklist(Est.getNode());
15858   }
15859 
15860   return Est;
15861 }
15862 
15863 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15864 /// For the reciprocal sqrt, we need to find the zero of the function:
15865 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15866 ///     =>
15867 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
15868 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
15869                                          unsigned Iterations,
15870                                          SDNodeFlags *Flags, bool Reciprocal) {
15871   EVT VT = Arg.getValueType();
15872   SDLoc DL(Arg);
15873   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
15874   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
15875 
15876   // This routine must enter the loop below to work correctly
15877   // when (Reciprocal == false).
15878   assert(Iterations > 0);
15879 
15880   // Newton iterations for reciprocal square root:
15881   // E = (E * -0.5) * ((A * E) * E + -3.0)
15882   for (unsigned i = 0; i < Iterations; ++i) {
15883     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
15884     AddToWorklist(AE.getNode());
15885 
15886     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
15887     AddToWorklist(AEE.getNode());
15888 
15889     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
15890     AddToWorklist(RHS.getNode());
15891 
15892     // When calculating a square root at the last iteration build:
15893     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
15894     // (notice a common subexpression)
15895     SDValue LHS;
15896     if (Reciprocal || (i + 1) < Iterations) {
15897       // RSQRT: LHS = (E * -0.5)
15898       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
15899     } else {
15900       // SQRT: LHS = (A * E) * -0.5
15901       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
15902     }
15903     AddToWorklist(LHS.getNode());
15904 
15905     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
15906     AddToWorklist(Est.getNode());
15907   }
15908 
15909   return Est;
15910 }
15911 
15912 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
15913 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
15914 /// Op can be zero.
15915 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
15916                                            bool Reciprocal) {
15917   if (Level >= AfterLegalizeDAG)
15918     return SDValue();
15919 
15920   // TODO: Handle half and/or extended types?
15921   EVT VT = Op.getValueType();
15922   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15923     return SDValue();
15924 
15925   // If estimates are explicitly disabled for this function, we're done.
15926   MachineFunction &MF = DAG.getMachineFunction();
15927   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
15928   if (Enabled == TLI.ReciprocalEstimate::Disabled)
15929     return SDValue();
15930 
15931   // Estimates may be explicitly enabled for this type with a custom number of
15932   // refinement steps.
15933   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
15934 
15935   bool UseOneConstNR = false;
15936   if (SDValue Est =
15937       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
15938                           Reciprocal)) {
15939     AddToWorklist(Est.getNode());
15940 
15941     if (Iterations) {
15942       Est = UseOneConstNR
15943             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
15944             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
15945 
15946       if (!Reciprocal) {
15947         // Unfortunately, Est is now NaN if the input was exactly 0.0.
15948         // Select out this case and force the answer to 0.0.
15949         EVT VT = Op.getValueType();
15950         SDLoc DL(Op);
15951 
15952         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
15953         EVT CCVT = getSetCCResultType(VT);
15954         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
15955         AddToWorklist(ZeroCmp.getNode());
15956 
15957         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
15958                           ZeroCmp, FPZero, Est);
15959         AddToWorklist(Est.getNode());
15960       }
15961     }
15962     return Est;
15963   }
15964 
15965   return SDValue();
15966 }
15967 
15968 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15969   return buildSqrtEstimateImpl(Op, Flags, true);
15970 }
15971 
15972 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15973   return buildSqrtEstimateImpl(Op, Flags, false);
15974 }
15975 
15976 /// Return true if base is a frame index, which is known not to alias with
15977 /// anything but itself.  Provides base object and offset as results.
15978 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
15979                            const GlobalValue *&GV, const void *&CV) {
15980   // Assume it is a primitive operation.
15981   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
15982 
15983   // If it's an adding a simple constant then integrate the offset.
15984   if (Base.getOpcode() == ISD::ADD) {
15985     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
15986       Base = Base.getOperand(0);
15987       Offset += C->getSExtValue();
15988     }
15989   }
15990 
15991   // Return the underlying GlobalValue, and update the Offset.  Return false
15992   // for GlobalAddressSDNode since the same GlobalAddress may be represented
15993   // by multiple nodes with different offsets.
15994   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
15995     GV = G->getGlobal();
15996     Offset += G->getOffset();
15997     return false;
15998   }
15999 
16000   // Return the underlying Constant value, and update the Offset.  Return false
16001   // for ConstantSDNodes since the same constant pool entry may be represented
16002   // by multiple nodes with different offsets.
16003   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16004     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16005                                          : (const void *)C->getConstVal();
16006     Offset += C->getOffset();
16007     return false;
16008   }
16009   // If it's any of the following then it can't alias with anything but itself.
16010   return isa<FrameIndexSDNode>(Base);
16011 }
16012 
16013 /// Return true if there is any possibility that the two addresses overlap.
16014 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16015   // If they are the same then they must be aliases.
16016   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16017 
16018   // If they are both volatile then they cannot be reordered.
16019   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16020 
16021   // If one operation reads from invariant memory, and the other may store, they
16022   // cannot alias. These should really be checking the equivalent of mayWrite,
16023   // but it only matters for memory nodes other than load /store.
16024   if (Op0->isInvariant() && Op1->writeMem())
16025     return false;
16026 
16027   if (Op1->isInvariant() && Op0->writeMem())
16028     return false;
16029 
16030   // Gather base node and offset information.
16031   SDValue Base1, Base2;
16032   int64_t Offset1, Offset2;
16033   const GlobalValue *GV1, *GV2;
16034   const void *CV1, *CV2;
16035   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
16036                                       Base1, Offset1, GV1, CV1);
16037   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
16038                                       Base2, Offset2, GV2, CV2);
16039 
16040   // If they have a same base address then check to see if they overlap.
16041   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
16042     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
16043              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
16044 
16045   // It is possible for different frame indices to alias each other, mostly
16046   // when tail call optimization reuses return address slots for arguments.
16047   // To catch this case, look up the actual index of frame indices to compute
16048   // the real alias relationship.
16049   if (isFrameIndex1 && isFrameIndex2) {
16050     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16051     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16052     Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
16053     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
16054              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
16055   }
16056 
16057   // Otherwise, if we know what the bases are, and they aren't identical, then
16058   // we know they cannot alias.
16059   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
16060     return false;
16061 
16062   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16063   // compared to the size and offset of the access, we may be able to prove they
16064   // do not alias.  This check is conservative for now to catch cases created by
16065   // splitting vector types.
16066   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
16067       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
16068       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
16069        Op1->getMemoryVT().getSizeInBits() >> 3) &&
16070       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
16071     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
16072     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
16073 
16074     // There is no overlap between these relatively aligned accesses of similar
16075     // size, return no alias.
16076     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
16077         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
16078       return false;
16079   }
16080 
16081   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16082                    ? CombinerGlobalAA
16083                    : DAG.getSubtarget().useAA();
16084 #ifndef NDEBUG
16085   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16086       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16087     UseAA = false;
16088 #endif
16089   if (UseAA &&
16090       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16091     // Use alias analysis information.
16092     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
16093                                  Op1->getSrcValueOffset());
16094     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
16095         Op0->getSrcValueOffset() - MinOffset;
16096     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
16097         Op1->getSrcValueOffset() - MinOffset;
16098     AliasResult AAResult =
16099         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
16100                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16101                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
16102                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
16103     if (AAResult == NoAlias)
16104       return false;
16105   }
16106 
16107   // Otherwise we have to assume they alias.
16108   return true;
16109 }
16110 
16111 /// Walk up chain skipping non-aliasing memory nodes,
16112 /// looking for aliasing nodes and adding them to the Aliases vector.
16113 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16114                                    SmallVectorImpl<SDValue> &Aliases) {
16115   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16116   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16117 
16118   // Get alias information for node.
16119   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16120 
16121   // Starting off.
16122   Chains.push_back(OriginalChain);
16123   unsigned Depth = 0;
16124 
16125   // Look at each chain and determine if it is an alias.  If so, add it to the
16126   // aliases list.  If not, then continue up the chain looking for the next
16127   // candidate.
16128   while (!Chains.empty()) {
16129     SDValue Chain = Chains.pop_back_val();
16130 
16131     // For TokenFactor nodes, look at each operand and only continue up the
16132     // chain until we reach the depth limit.
16133     //
16134     // FIXME: The depth check could be made to return the last non-aliasing
16135     // chain we found before we hit a tokenfactor rather than the original
16136     // chain.
16137     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16138       Aliases.clear();
16139       Aliases.push_back(OriginalChain);
16140       return;
16141     }
16142 
16143     // Don't bother if we've been before.
16144     if (!Visited.insert(Chain.getNode()).second)
16145       continue;
16146 
16147     switch (Chain.getOpcode()) {
16148     case ISD::EntryToken:
16149       // Entry token is ideal chain operand, but handled in FindBetterChain.
16150       break;
16151 
16152     case ISD::LOAD:
16153     case ISD::STORE: {
16154       // Get alias information for Chain.
16155       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16156           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16157 
16158       // If chain is alias then stop here.
16159       if (!(IsLoad && IsOpLoad) &&
16160           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16161         Aliases.push_back(Chain);
16162       } else {
16163         // Look further up the chain.
16164         Chains.push_back(Chain.getOperand(0));
16165         ++Depth;
16166       }
16167       break;
16168     }
16169 
16170     case ISD::TokenFactor:
16171       // We have to check each of the operands of the token factor for "small"
16172       // token factors, so we queue them up.  Adding the operands to the queue
16173       // (stack) in reverse order maintains the original order and increases the
16174       // likelihood that getNode will find a matching token factor (CSE.)
16175       if (Chain.getNumOperands() > 16) {
16176         Aliases.push_back(Chain);
16177         break;
16178       }
16179       for (unsigned n = Chain.getNumOperands(); n;)
16180         Chains.push_back(Chain.getOperand(--n));
16181       ++Depth;
16182       break;
16183 
16184     case ISD::CopyFromReg:
16185       // Forward past CopyFromReg.
16186       Chains.push_back(Chain.getOperand(0));
16187       ++Depth;
16188       break;
16189 
16190     default:
16191       // For all other instructions we will just have to take what we can get.
16192       Aliases.push_back(Chain);
16193       break;
16194     }
16195   }
16196 }
16197 
16198 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
16199 /// (aliasing node.)
16200 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
16201   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
16202 
16203   // Accumulate all the aliases to this node.
16204   GatherAllAliases(N, OldChain, Aliases);
16205 
16206   // If no operands then chain to entry token.
16207   if (Aliases.size() == 0)
16208     return DAG.getEntryNode();
16209 
16210   // If a single operand then chain to it.  We don't need to revisit it.
16211   if (Aliases.size() == 1)
16212     return Aliases[0];
16213 
16214   // Construct a custom tailored token factor.
16215   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
16216 }
16217 
16218 // This function tries to collect a bunch of potentially interesting
16219 // nodes to improve the chains of, all at once. This might seem
16220 // redundant, as this function gets called when visiting every store
16221 // node, so why not let the work be done on each store as it's visited?
16222 //
16223 // I believe this is mainly important because MergeConsecutiveStores
16224 // is unable to deal with merging stores of different sizes, so unless
16225 // we improve the chains of all the potential candidates up-front
16226 // before running MergeConsecutiveStores, it might only see some of
16227 // the nodes that will eventually be candidates, and then not be able
16228 // to go from a partially-merged state to the desired final
16229 // fully-merged state.
16230 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
16231   // This holds the base pointer, index, and the offset in bytes from the base
16232   // pointer.
16233   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
16234 
16235   // We must have a base and an offset.
16236   if (!BasePtr.Base.getNode())
16237     return false;
16238 
16239   // Do not handle stores to undef base pointers.
16240   if (BasePtr.Base.isUndef())
16241     return false;
16242 
16243   SmallVector<StoreSDNode *, 8> ChainedStores;
16244   ChainedStores.push_back(St);
16245 
16246   // Walk up the chain and look for nodes with offsets from the same
16247   // base pointer. Stop when reaching an instruction with a different kind
16248   // or instruction which has a different base pointer.
16249   StoreSDNode *Index = St;
16250   while (Index) {
16251     // If the chain has more than one use, then we can't reorder the mem ops.
16252     if (Index != St && !SDValue(Index, 0)->hasOneUse())
16253       break;
16254 
16255     if (Index->isVolatile() || Index->isIndexed())
16256       break;
16257 
16258     // Find the base pointer and offset for this memory node.
16259     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
16260 
16261     // Check that the base pointer is the same as the original one.
16262     if (!Ptr.equalBaseIndex(BasePtr))
16263       break;
16264 
16265     // Walk up the chain to find the next store node, ignoring any
16266     // intermediate loads. Any other kind of node will halt the loop.
16267     SDNode *NextInChain = Index->getChain().getNode();
16268     while (true) {
16269       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
16270         // We found a store node. Use it for the next iteration.
16271         if (STn->isVolatile() || STn->isIndexed()) {
16272           Index = nullptr;
16273           break;
16274         }
16275         ChainedStores.push_back(STn);
16276         Index = STn;
16277         break;
16278       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
16279         NextInChain = Ldn->getChain().getNode();
16280         continue;
16281       } else {
16282         Index = nullptr;
16283         break;
16284       }
16285     } // end while
16286   }
16287 
16288   // At this point, ChainedStores lists all of the Store nodes
16289   // reachable by iterating up through chain nodes matching the above
16290   // conditions.  For each such store identified, try to find an
16291   // earlier chain to attach the store to which won't violate the
16292   // required ordering.
16293   bool MadeChangeToSt = false;
16294   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
16295 
16296   for (StoreSDNode *ChainedStore : ChainedStores) {
16297     SDValue Chain = ChainedStore->getChain();
16298     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
16299 
16300     if (Chain != BetterChain) {
16301       if (ChainedStore == St)
16302         MadeChangeToSt = true;
16303       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
16304     }
16305   }
16306 
16307   // Do all replacements after finding the replacements to make to avoid making
16308   // the chains more complicated by introducing new TokenFactors.
16309   for (auto Replacement : BetterChains)
16310     replaceStoreChain(Replacement.first, Replacement.second);
16311 
16312   return MadeChangeToSt;
16313 }
16314 
16315 /// This is the entry point for the file.
16316 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
16317                            CodeGenOpt::Level OptLevel) {
16318   /// This is the main entry point to this class.
16319   DAGCombiner(*this, AA, OptLevel).Run(Level);
16320 }
16321