1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Target/TargetLowering.h"
39 #include "llvm/Target/TargetOptions.h"
40 #include "llvm/Target/TargetRegisterInfo.h"
41 #include "llvm/Target/TargetSubtargetInfo.h"
42 #include <algorithm>
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "dagcombine"
46 
47 STATISTIC(NodesCombined   , "Number of dag nodes combined");
48 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
49 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
50 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
51 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
52 STATISTIC(SlicedLoads, "Number of load sliced");
53 
54 namespace {
55   static cl::opt<bool>
56     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
57                cl::desc("Enable DAG combiner's use of IR alias analysis"));
58 
59   static cl::opt<bool>
60     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
61                cl::desc("Enable DAG combiner's use of TBAA"));
62 
63 #ifndef NDEBUG
64   static cl::opt<std::string>
65     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
66                cl::desc("Only use DAG-combiner alias analysis in this"
67                         " function"));
68 #endif
69 
70   /// Hidden option to stress test load slicing, i.e., when this option
71   /// is enabled, load slicing bypasses most of its profitability guards.
72   static cl::opt<bool>
73   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
74                     cl::desc("Bypass the profitability model of load "
75                              "slicing"),
76                     cl::init(false));
77 
78   static cl::opt<bool>
79     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
80                       cl::desc("DAG combiner may split indexing from loads"));
81 
82 //------------------------------ DAGCombiner ---------------------------------//
83 
84   class DAGCombiner {
85     SelectionDAG &DAG;
86     const TargetLowering &TLI;
87     CombineLevel Level;
88     CodeGenOpt::Level OptLevel;
89     bool LegalOperations;
90     bool LegalTypes;
91     bool ForCodeSize;
92 
93     /// \brief Worklist of all of the nodes that need to be simplified.
94     ///
95     /// This must behave as a stack -- new nodes to process are pushed onto the
96     /// back and when processing we pop off of the back.
97     ///
98     /// The worklist will not contain duplicates but may contain null entries
99     /// due to nodes being deleted from the underlying DAG.
100     SmallVector<SDNode *, 64> Worklist;
101 
102     /// \brief Mapping from an SDNode to its position on the worklist.
103     ///
104     /// This is used to find and remove nodes from the worklist (by nulling
105     /// them) when they are deleted from the underlying DAG. It relies on
106     /// stable indices of nodes within the worklist.
107     DenseMap<SDNode *, unsigned> WorklistMap;
108 
109     /// \brief Set of nodes which have been combined (at least once).
110     ///
111     /// This is used to allow us to reliably add any operands of a DAG node
112     /// which have not yet been combined to the worklist.
113     SmallPtrSet<SDNode *, 32> CombinedNodes;
114 
115     // AA - Used for DAG load/store alias analysis.
116     AliasAnalysis &AA;
117 
118     /// When an instruction is simplified, add all users of the instruction to
119     /// the work lists because they might get more simplified now.
120     void AddUsersToWorklist(SDNode *N) {
121       for (SDNode *Node : N->uses())
122         AddToWorklist(Node);
123     }
124 
125     /// Call the node-specific routine that folds each particular type of node.
126     SDValue visit(SDNode *N);
127 
128   public:
129     /// Add to the worklist making sure its instance is at the back (next to be
130     /// processed.)
131     void AddToWorklist(SDNode *N) {
132       assert(N->getOpcode() != ISD::DELETED_NODE &&
133              "Deleted Node added to Worklist");
134 
135       // Skip handle nodes as they can't usefully be combined and confuse the
136       // zero-use deletion strategy.
137       if (N->getOpcode() == ISD::HANDLENODE)
138         return;
139 
140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
141         Worklist.push_back(N);
142     }
143 
144     /// Remove all instances of N from the worklist.
145     void removeFromWorklist(SDNode *N) {
146       CombinedNodes.erase(N);
147 
148       auto It = WorklistMap.find(N);
149       if (It == WorklistMap.end())
150         return; // Not in the worklist.
151 
152       // Null out the entry rather than erasing it to avoid a linear operation.
153       Worklist[It->second] = nullptr;
154       WorklistMap.erase(It);
155     }
156 
157     void deleteAndRecombine(SDNode *N);
158     bool recursivelyDeleteUnusedNodes(SDNode *N);
159 
160     /// Replaces all uses of the results of one DAG node with new values.
161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
162                       bool AddTo = true);
163 
164     /// Replaces all uses of the results of one DAG node with new values.
165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
166       return CombineTo(N, &Res, 1, AddTo);
167     }
168 
169     /// Replaces all uses of the results of one DAG node with new values.
170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
171                       bool AddTo = true) {
172       SDValue To[] = { Res0, Res1 };
173       return CombineTo(N, To, 2, AddTo);
174     }
175 
176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
177 
178   private:
179     unsigned MaximumLegalStoreInBits;
180 
181     /// Check the specified integer node value to see if it can be simplified or
182     /// if things it uses can be simplified by bit propagation.
183     /// If so, return true.
184     bool SimplifyDemandedBits(SDValue Op) {
185       unsigned BitWidth = Op.getScalarValueSizeInBits();
186       APInt Demanded = APInt::getAllOnesValue(BitWidth);
187       return SimplifyDemandedBits(Op, Demanded);
188     }
189 
190     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
191 
192     bool CombineToPreIndexedLoadStore(SDNode *N);
193     bool CombineToPostIndexedLoadStore(SDNode *N);
194     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
195     bool SliceUpLoad(SDNode *N);
196 
197     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
198     ///   load.
199     ///
200     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
201     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
202     /// \param EltNo index of the vector element to load.
203     /// \param OriginalLoad load that EVE came from to be replaced.
204     /// \returns EVE on success SDValue() on failure.
205     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
206         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
207     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
208     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
209     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
210     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
211     SDValue PromoteIntBinOp(SDValue Op);
212     SDValue PromoteIntShiftOp(SDValue Op);
213     SDValue PromoteExtend(SDValue Op);
214     bool PromoteLoad(SDValue Op);
215 
216     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
217                          SDValue ExtLoad, const SDLoc &DL,
218                          ISD::NodeType ExtType);
219 
220     /// Call the node-specific routine that knows how to fold each
221     /// particular type of node. If that doesn't do anything, try the
222     /// target-specific DAG combines.
223     SDValue combine(SDNode *N);
224 
225     // Visitation implementation - Implement dag node combining for different
226     // node types.  The semantics are as follows:
227     // Return Value:
228     //   SDValue.getNode() == 0 - No change was made
229     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
230     //   otherwise              - N should be replaced by the returned Operand.
231     //
232     SDValue visitTokenFactor(SDNode *N);
233     SDValue visitMERGE_VALUES(SDNode *N);
234     SDValue visitADD(SDNode *N);
235     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
236     SDValue visitSUB(SDNode *N);
237     SDValue visitADDC(SDNode *N);
238     SDValue visitUADDO(SDNode *N);
239     SDValue visitSUBC(SDNode *N);
240     SDValue visitUSUBO(SDNode *N);
241     SDValue visitADDE(SDNode *N);
242     SDValue visitSUBE(SDNode *N);
243     SDValue visitMUL(SDNode *N);
244     SDValue useDivRem(SDNode *N);
245     SDValue visitSDIV(SDNode *N);
246     SDValue visitUDIV(SDNode *N);
247     SDValue visitREM(SDNode *N);
248     SDValue visitMULHU(SDNode *N);
249     SDValue visitMULHS(SDNode *N);
250     SDValue visitSMUL_LOHI(SDNode *N);
251     SDValue visitUMUL_LOHI(SDNode *N);
252     SDValue visitSMULO(SDNode *N);
253     SDValue visitUMULO(SDNode *N);
254     SDValue visitIMINMAX(SDNode *N);
255     SDValue visitAND(SDNode *N);
256     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
257     SDValue visitOR(SDNode *N);
258     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
259     SDValue visitXOR(SDNode *N);
260     SDValue SimplifyVBinOp(SDNode *N);
261     SDValue visitSHL(SDNode *N);
262     SDValue visitSRA(SDNode *N);
263     SDValue visitSRL(SDNode *N);
264     SDValue visitRotate(SDNode *N);
265     SDValue visitABS(SDNode *N);
266     SDValue visitBSWAP(SDNode *N);
267     SDValue visitBITREVERSE(SDNode *N);
268     SDValue visitCTLZ(SDNode *N);
269     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
270     SDValue visitCTTZ(SDNode *N);
271     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
272     SDValue visitCTPOP(SDNode *N);
273     SDValue visitSELECT(SDNode *N);
274     SDValue visitVSELECT(SDNode *N);
275     SDValue visitSELECT_CC(SDNode *N);
276     SDValue visitSETCC(SDNode *N);
277     SDValue visitSETCCE(SDNode *N);
278     SDValue visitSIGN_EXTEND(SDNode *N);
279     SDValue visitZERO_EXTEND(SDNode *N);
280     SDValue visitANY_EXTEND(SDNode *N);
281     SDValue visitAssertZext(SDNode *N);
282     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
283     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
284     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
285     SDValue visitTRUNCATE(SDNode *N);
286     SDValue visitBITCAST(SDNode *N);
287     SDValue visitBUILD_PAIR(SDNode *N);
288     SDValue visitFADD(SDNode *N);
289     SDValue visitFSUB(SDNode *N);
290     SDValue visitFMUL(SDNode *N);
291     SDValue visitFMA(SDNode *N);
292     SDValue visitFDIV(SDNode *N);
293     SDValue visitFREM(SDNode *N);
294     SDValue visitFSQRT(SDNode *N);
295     SDValue visitFCOPYSIGN(SDNode *N);
296     SDValue visitSINT_TO_FP(SDNode *N);
297     SDValue visitUINT_TO_FP(SDNode *N);
298     SDValue visitFP_TO_SINT(SDNode *N);
299     SDValue visitFP_TO_UINT(SDNode *N);
300     SDValue visitFP_ROUND(SDNode *N);
301     SDValue visitFP_ROUND_INREG(SDNode *N);
302     SDValue visitFP_EXTEND(SDNode *N);
303     SDValue visitFNEG(SDNode *N);
304     SDValue visitFABS(SDNode *N);
305     SDValue visitFCEIL(SDNode *N);
306     SDValue visitFTRUNC(SDNode *N);
307     SDValue visitFFLOOR(SDNode *N);
308     SDValue visitFMINNUM(SDNode *N);
309     SDValue visitFMAXNUM(SDNode *N);
310     SDValue visitBRCOND(SDNode *N);
311     SDValue visitBR_CC(SDNode *N);
312     SDValue visitLOAD(SDNode *N);
313 
314     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
315     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
316 
317     SDValue visitSTORE(SDNode *N);
318     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
319     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
320     SDValue visitBUILD_VECTOR(SDNode *N);
321     SDValue visitCONCAT_VECTORS(SDNode *N);
322     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
323     SDValue visitVECTOR_SHUFFLE(SDNode *N);
324     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
325     SDValue visitINSERT_SUBVECTOR(SDNode *N);
326     SDValue visitMLOAD(SDNode *N);
327     SDValue visitMSTORE(SDNode *N);
328     SDValue visitMGATHER(SDNode *N);
329     SDValue visitMSCATTER(SDNode *N);
330     SDValue visitFP_TO_FP16(SDNode *N);
331     SDValue visitFP16_TO_FP(SDNode *N);
332 
333     SDValue visitFADDForFMACombine(SDNode *N);
334     SDValue visitFSUBForFMACombine(SDNode *N);
335     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
336 
337     SDValue XformToShuffleWithZero(SDNode *N);
338     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
339                            SDValue RHS);
340 
341     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
342 
343     SDValue foldSelectOfConstants(SDNode *N);
344     SDValue foldBinOpIntoSelect(SDNode *BO);
345     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
346     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
347     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
348     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
349                              SDValue N2, SDValue N3, ISD::CondCode CC,
350                              bool NotExtCompare = false);
351     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
352                                    SDValue N2, SDValue N3, ISD::CondCode CC);
353     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
354                               const SDLoc &DL);
355     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
356                           const SDLoc &DL, bool foldBooleans = true);
357 
358     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
359                            SDValue &CC) const;
360     bool isOneUseSetCC(SDValue N) const;
361 
362     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
363                                          unsigned HiOp);
364     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
365     SDValue CombineExtLoad(SDNode *N);
366     SDValue combineRepeatedFPDivisors(SDNode *N);
367     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
368     SDValue BuildSDIV(SDNode *N);
369     SDValue BuildSDIVPow2(SDNode *N);
370     SDValue BuildUDIV(SDNode *N);
371     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
372     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
373     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
374     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
375     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
376     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
377                                 SDNodeFlags *Flags, bool Reciprocal);
378     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
379                                 SDNodeFlags *Flags, bool Reciprocal);
380     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
381                                bool DemandHighBits = true);
382     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
383     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
384                               SDValue InnerPos, SDValue InnerNeg,
385                               unsigned PosOpcode, unsigned NegOpcode,
386                               const SDLoc &DL);
387     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
388     SDValue MatchLoadCombine(SDNode *N);
389     SDValue ReduceLoadWidth(SDNode *N);
390     SDValue ReduceLoadOpStoreWidth(SDNode *N);
391     SDValue splitMergedValStore(StoreSDNode *ST);
392     SDValue TransformFPLoadStorePair(SDNode *N);
393     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
394     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
395     SDValue reduceBuildVecToShuffle(SDNode *N);
396     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
397                                   ArrayRef<int> VectorMask, SDValue VecIn1,
398                                   SDValue VecIn2, unsigned LeftIdx);
399 
400     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
401 
402     /// Walk up chain skipping non-aliasing memory nodes,
403     /// looking for aliasing nodes and adding them to the Aliases vector.
404     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
405                           SmallVectorImpl<SDValue> &Aliases);
406 
407     /// Return true if there is any possibility that the two addresses overlap.
408     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
409 
410     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
411     /// chain (aliasing node.)
412     SDValue FindBetterChain(SDNode *N, SDValue Chain);
413 
414     /// Try to replace a store and any possibly adjacent stores on
415     /// consecutive chains with better chains. Return true only if St is
416     /// replaced.
417     ///
418     /// Notice that other chains may still be replaced even if the function
419     /// returns false.
420     bool findBetterNeighborChains(StoreSDNode *St);
421 
422     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
423     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
424 
425     /// Holds a pointer to an LSBaseSDNode as well as information on where it
426     /// is located in a sequence of memory operations connected by a chain.
427     struct MemOpLink {
428       MemOpLink(LSBaseSDNode *N, int64_t Offset)
429           : MemNode(N), OffsetFromBase(Offset) {}
430       // Ptr to the mem node.
431       LSBaseSDNode *MemNode;
432       // Offset from the base ptr.
433       int64_t OffsetFromBase;
434     };
435 
436     /// This is a helper function for visitMUL to check the profitability
437     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
438     /// MulNode is the original multiply, AddNode is (add x, c1),
439     /// and ConstNode is c2.
440     bool isMulAddWithConstProfitable(SDNode *MulNode,
441                                      SDValue &AddNode,
442                                      SDValue &ConstNode);
443 
444 
445     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
446     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
447     /// the type of the loaded value to be extended.  LoadedVT returns the type
448     /// of the original loaded value.  NarrowLoad returns whether the load would
449     /// need to be narrowed in order to match.
450     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
451                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
452                           bool &NarrowLoad);
453 
454     /// This is a helper function for MergeConsecutiveStores. When the source
455     /// elements of the consecutive stores are all constants or all extracted
456     /// vector elements, try to merge them into one larger store.
457     /// \return True if a merged store was created.
458     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
459                                          EVT MemVT, unsigned NumStores,
460                                          bool IsConstantSrc, bool UseVector);
461 
462     /// This is a helper function for MergeConsecutiveStores.
463     /// Stores that may be merged are placed in StoreNodes.
464     void getStoreMergeCandidates(StoreSDNode *St,
465                                  SmallVectorImpl<MemOpLink> &StoreNodes);
466 
467     /// Helper function for MergeConsecutiveStores. Checks if
468     /// Candidate stores have indirect dependency through their
469     /// operands. \return True if safe to merge
470     bool checkMergeStoreCandidatesForDependencies(
471         SmallVectorImpl<MemOpLink> &StoreNodes);
472 
473     /// Merge consecutive store operations into a wide store.
474     /// This optimization uses wide integers or vectors when possible.
475     /// \return number of stores that were merged into a merged store (the
476     /// affected nodes are stored as a prefix in \p StoreNodes).
477     bool MergeConsecutiveStores(StoreSDNode *N);
478 
479     /// \brief Try to transform a truncation where C is a constant:
480     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
481     ///
482     /// \p N needs to be a truncation and its first operand an AND. Other
483     /// requirements are checked by the function (e.g. that trunc is
484     /// single-use) and if missed an empty SDValue is returned.
485     SDValue distributeTruncateThroughAnd(SDNode *N);
486 
487   public:
488     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
489         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
490           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
491       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
492 
493       MaximumLegalStoreInBits = 0;
494       for (MVT VT : MVT::all_valuetypes())
495         if (EVT(VT).isSimple() && VT != MVT::Other &&
496             TLI.isTypeLegal(EVT(VT)) &&
497             VT.getSizeInBits() >= MaximumLegalStoreInBits)
498           MaximumLegalStoreInBits = VT.getSizeInBits();
499     }
500 
501     /// Runs the dag combiner on all nodes in the work list
502     void Run(CombineLevel AtLevel);
503 
504     SelectionDAG &getDAG() const { return DAG; }
505 
506     /// Returns a type large enough to hold any valid shift amount - before type
507     /// legalization these can be huge.
508     EVT getShiftAmountTy(EVT LHSTy) {
509       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
510       if (LHSTy.isVector())
511         return LHSTy;
512       auto &DL = DAG.getDataLayout();
513       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
514                         : TLI.getPointerTy(DL);
515     }
516 
517     /// This method returns true if we are running before type legalization or
518     /// if the specified VT is legal.
519     bool isTypeLegal(const EVT &VT) {
520       if (!LegalTypes) return true;
521       return TLI.isTypeLegal(VT);
522     }
523 
524     /// Convenience wrapper around TargetLowering::getSetCCResultType
525     EVT getSetCCResultType(EVT VT) const {
526       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
527     }
528   };
529 }
530 
531 
532 namespace {
533 /// This class is a DAGUpdateListener that removes any deleted
534 /// nodes from the worklist.
535 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
536   DAGCombiner &DC;
537 public:
538   explicit WorklistRemover(DAGCombiner &dc)
539     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
540 
541   void NodeDeleted(SDNode *N, SDNode *E) override {
542     DC.removeFromWorklist(N);
543   }
544 };
545 }
546 
547 //===----------------------------------------------------------------------===//
548 //  TargetLowering::DAGCombinerInfo implementation
549 //===----------------------------------------------------------------------===//
550 
551 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
552   ((DAGCombiner*)DC)->AddToWorklist(N);
553 }
554 
555 SDValue TargetLowering::DAGCombinerInfo::
556 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
557   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
558 }
559 
560 SDValue TargetLowering::DAGCombinerInfo::
561 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
562   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
563 }
564 
565 
566 SDValue TargetLowering::DAGCombinerInfo::
567 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
568   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
569 }
570 
571 void TargetLowering::DAGCombinerInfo::
572 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
573   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
574 }
575 
576 //===----------------------------------------------------------------------===//
577 // Helper Functions
578 //===----------------------------------------------------------------------===//
579 
580 void DAGCombiner::deleteAndRecombine(SDNode *N) {
581   removeFromWorklist(N);
582 
583   // If the operands of this node are only used by the node, they will now be
584   // dead. Make sure to re-visit them and recursively delete dead nodes.
585   for (const SDValue &Op : N->ops())
586     // For an operand generating multiple values, one of the values may
587     // become dead allowing further simplification (e.g. split index
588     // arithmetic from an indexed load).
589     if (Op->hasOneUse() || Op->getNumValues() > 1)
590       AddToWorklist(Op.getNode());
591 
592   DAG.DeleteNode(N);
593 }
594 
595 /// Return 1 if we can compute the negated form of the specified expression for
596 /// the same cost as the expression itself, or 2 if we can compute the negated
597 /// form more cheaply than the expression itself.
598 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
599                                const TargetLowering &TLI,
600                                const TargetOptions *Options,
601                                unsigned Depth = 0) {
602   // fneg is removable even if it has multiple uses.
603   if (Op.getOpcode() == ISD::FNEG) return 2;
604 
605   // Don't allow anything with multiple uses.
606   if (!Op.hasOneUse()) return 0;
607 
608   // Don't recurse exponentially.
609   if (Depth > 6) return 0;
610 
611   switch (Op.getOpcode()) {
612   default: return false;
613   case ISD::ConstantFP: {
614     if (!LegalOperations)
615       return 1;
616 
617     // Don't invert constant FP values after legalization unless the target says
618     // the negated constant is legal.
619     EVT VT = Op.getValueType();
620     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
621       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
622   }
623   case ISD::FADD:
624     // FIXME: determine better conditions for this xform.
625     if (!Options->UnsafeFPMath) return 0;
626 
627     // After operation legalization, it might not be legal to create new FSUBs.
628     if (LegalOperations &&
629         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
630       return 0;
631 
632     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
633     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
634                                     Options, Depth + 1))
635       return V;
636     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
637     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
638                               Depth + 1);
639   case ISD::FSUB:
640     // We can't turn -(A-B) into B-A when we honor signed zeros.
641     if (!Options->NoSignedZerosFPMath &&
642         !Op.getNode()->getFlags()->hasNoSignedZeros())
643       return 0;
644 
645     // fold (fneg (fsub A, B)) -> (fsub B, A)
646     return 1;
647 
648   case ISD::FMUL:
649   case ISD::FDIV:
650     if (Options->HonorSignDependentRoundingFPMath()) return 0;
651 
652     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
653     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
654                                     Options, Depth + 1))
655       return V;
656 
657     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
658                               Depth + 1);
659 
660   case ISD::FP_EXTEND:
661   case ISD::FP_ROUND:
662   case ISD::FSIN:
663     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
664                               Depth + 1);
665   }
666 }
667 
668 /// If isNegatibleForFree returns true, return the newly negated expression.
669 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
670                                     bool LegalOperations, unsigned Depth = 0) {
671   const TargetOptions &Options = DAG.getTarget().Options;
672   // fneg is removable even if it has multiple uses.
673   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
674 
675   // Don't allow anything with multiple uses.
676   assert(Op.hasOneUse() && "Unknown reuse!");
677 
678   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
679 
680   const SDNodeFlags *Flags = Op.getNode()->getFlags();
681 
682   switch (Op.getOpcode()) {
683   default: llvm_unreachable("Unknown code");
684   case ISD::ConstantFP: {
685     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
686     V.changeSign();
687     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
688   }
689   case ISD::FADD:
690     // FIXME: determine better conditions for this xform.
691     assert(Options.UnsafeFPMath);
692 
693     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
694     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
695                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
696       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
697                          GetNegatedExpression(Op.getOperand(0), DAG,
698                                               LegalOperations, Depth+1),
699                          Op.getOperand(1), Flags);
700     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
701     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
702                        GetNegatedExpression(Op.getOperand(1), DAG,
703                                             LegalOperations, Depth+1),
704                        Op.getOperand(0), Flags);
705   case ISD::FSUB:
706     // fold (fneg (fsub 0, B)) -> B
707     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
708       if (N0CFP->isZero())
709         return Op.getOperand(1);
710 
711     // fold (fneg (fsub A, B)) -> (fsub B, A)
712     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
713                        Op.getOperand(1), Op.getOperand(0), Flags);
714 
715   case ISD::FMUL:
716   case ISD::FDIV:
717     assert(!Options.HonorSignDependentRoundingFPMath());
718 
719     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
720     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
721                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
722       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
723                          GetNegatedExpression(Op.getOperand(0), DAG,
724                                               LegalOperations, Depth+1),
725                          Op.getOperand(1), Flags);
726 
727     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
728     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
729                        Op.getOperand(0),
730                        GetNegatedExpression(Op.getOperand(1), DAG,
731                                             LegalOperations, Depth+1), Flags);
732 
733   case ISD::FP_EXTEND:
734   case ISD::FSIN:
735     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
736                        GetNegatedExpression(Op.getOperand(0), DAG,
737                                             LegalOperations, Depth+1));
738   case ISD::FP_ROUND:
739       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
740                          GetNegatedExpression(Op.getOperand(0), DAG,
741                                               LegalOperations, Depth+1),
742                          Op.getOperand(1));
743   }
744 }
745 
746 // APInts must be the same size for most operations, this helper
747 // function zero extends the shorter of the pair so that they match.
748 // We provide an Offset so that we can create bitwidths that won't overflow.
749 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
750   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
751   LHS = LHS.zextOrSelf(Bits);
752   RHS = RHS.zextOrSelf(Bits);
753 }
754 
755 // Return true if this node is a setcc, or is a select_cc
756 // that selects between the target values used for true and false, making it
757 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
758 // the appropriate nodes based on the type of node we are checking. This
759 // simplifies life a bit for the callers.
760 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
761                                     SDValue &CC) const {
762   if (N.getOpcode() == ISD::SETCC) {
763     LHS = N.getOperand(0);
764     RHS = N.getOperand(1);
765     CC  = N.getOperand(2);
766     return true;
767   }
768 
769   if (N.getOpcode() != ISD::SELECT_CC ||
770       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
771       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
772     return false;
773 
774   if (TLI.getBooleanContents(N.getValueType()) ==
775       TargetLowering::UndefinedBooleanContent)
776     return false;
777 
778   LHS = N.getOperand(0);
779   RHS = N.getOperand(1);
780   CC  = N.getOperand(4);
781   return true;
782 }
783 
784 /// Return true if this is a SetCC-equivalent operation with only one use.
785 /// If this is true, it allows the users to invert the operation for free when
786 /// it is profitable to do so.
787 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
788   SDValue N0, N1, N2;
789   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
790     return true;
791   return false;
792 }
793 
794 // \brief Returns the SDNode if it is a constant float BuildVector
795 // or constant float.
796 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
797   if (isa<ConstantFPSDNode>(N))
798     return N.getNode();
799   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
800     return N.getNode();
801   return nullptr;
802 }
803 
804 // Determines if it is a constant integer or a build vector of constant
805 // integers (and undefs).
806 // Do not permit build vector implicit truncation.
807 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
808   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
809     return !(Const->isOpaque() && NoOpaques);
810   if (N.getOpcode() != ISD::BUILD_VECTOR)
811     return false;
812   unsigned BitWidth = N.getScalarValueSizeInBits();
813   for (const SDValue &Op : N->op_values()) {
814     if (Op.isUndef())
815       continue;
816     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
817     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
818         (Const->isOpaque() && NoOpaques))
819       return false;
820   }
821   return true;
822 }
823 
824 // Determines if it is a constant null integer or a splatted vector of a
825 // constant null integer (with no undefs).
826 // Build vector implicit truncation is not an issue for null values.
827 static bool isNullConstantOrNullSplatConstant(SDValue N) {
828   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
829     return Splat->isNullValue();
830   return false;
831 }
832 
833 // Determines if it is a constant integer of one or a splatted vector of a
834 // constant integer of one (with no undefs).
835 // Do not permit build vector implicit truncation.
836 static bool isOneConstantOrOneSplatConstant(SDValue N) {
837   unsigned BitWidth = N.getScalarValueSizeInBits();
838   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
839     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
840   return false;
841 }
842 
843 // Determines if it is a constant integer of all ones or a splatted vector of a
844 // constant integer of all ones (with no undefs).
845 // Do not permit build vector implicit truncation.
846 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
847   unsigned BitWidth = N.getScalarValueSizeInBits();
848   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
849     return Splat->isAllOnesValue() &&
850            Splat->getAPIntValue().getBitWidth() == BitWidth;
851   return false;
852 }
853 
854 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
855 // undef's.
856 static bool isAnyConstantBuildVector(const SDNode *N) {
857   return ISD::isBuildVectorOfConstantSDNodes(N) ||
858          ISD::isBuildVectorOfConstantFPSDNodes(N);
859 }
860 
861 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
862                                     SDValue N1) {
863   EVT VT = N0.getValueType();
864   if (N0.getOpcode() == Opc) {
865     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
866       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
867         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
868         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
869           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
870         return SDValue();
871       }
872       if (N0.hasOneUse()) {
873         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
874         // use
875         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
876         if (!OpNode.getNode())
877           return SDValue();
878         AddToWorklist(OpNode.getNode());
879         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
880       }
881     }
882   }
883 
884   if (N1.getOpcode() == Opc) {
885     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
886       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
887         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
888         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
889           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
890         return SDValue();
891       }
892       if (N1.hasOneUse()) {
893         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
894         // use
895         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
896         if (!OpNode.getNode())
897           return SDValue();
898         AddToWorklist(OpNode.getNode());
899         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
900       }
901     }
902   }
903 
904   return SDValue();
905 }
906 
907 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
908                                bool AddTo) {
909   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
910   ++NodesCombined;
911   DEBUG(dbgs() << "\nReplacing.1 ";
912         N->dump(&DAG);
913         dbgs() << "\nWith: ";
914         To[0].getNode()->dump(&DAG);
915         dbgs() << " and " << NumTo-1 << " other values\n");
916   for (unsigned i = 0, e = NumTo; i != e; ++i)
917     assert((!To[i].getNode() ||
918             N->getValueType(i) == To[i].getValueType()) &&
919            "Cannot combine value to value of different type!");
920 
921   WorklistRemover DeadNodes(*this);
922   DAG.ReplaceAllUsesWith(N, To);
923   if (AddTo) {
924     // Push the new nodes and any users onto the worklist
925     for (unsigned i = 0, e = NumTo; i != e; ++i) {
926       if (To[i].getNode()) {
927         AddToWorklist(To[i].getNode());
928         AddUsersToWorklist(To[i].getNode());
929       }
930     }
931   }
932 
933   // Finally, if the node is now dead, remove it from the graph.  The node
934   // may not be dead if the replacement process recursively simplified to
935   // something else needing this node.
936   if (N->use_empty())
937     deleteAndRecombine(N);
938   return SDValue(N, 0);
939 }
940 
941 void DAGCombiner::
942 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
943   // Replace all uses.  If any nodes become isomorphic to other nodes and
944   // are deleted, make sure to remove them from our worklist.
945   WorklistRemover DeadNodes(*this);
946   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
947 
948   // Push the new node and any (possibly new) users onto the worklist.
949   AddToWorklist(TLO.New.getNode());
950   AddUsersToWorklist(TLO.New.getNode());
951 
952   // Finally, if the node is now dead, remove it from the graph.  The node
953   // may not be dead if the replacement process recursively simplified to
954   // something else needing this node.
955   if (TLO.Old.getNode()->use_empty())
956     deleteAndRecombine(TLO.Old.getNode());
957 }
958 
959 /// Check the specified integer node value to see if it can be simplified or if
960 /// things it uses can be simplified by bit propagation. If so, return true.
961 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
962   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
963   APInt KnownZero, KnownOne;
964   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
965     return false;
966 
967   // Revisit the node.
968   AddToWorklist(Op.getNode());
969 
970   // Replace the old value with the new one.
971   ++NodesCombined;
972   DEBUG(dbgs() << "\nReplacing.2 ";
973         TLO.Old.getNode()->dump(&DAG);
974         dbgs() << "\nWith: ";
975         TLO.New.getNode()->dump(&DAG);
976         dbgs() << '\n');
977 
978   CommitTargetLoweringOpt(TLO);
979   return true;
980 }
981 
982 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
983   SDLoc DL(Load);
984   EVT VT = Load->getValueType(0);
985   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
986 
987   DEBUG(dbgs() << "\nReplacing.9 ";
988         Load->dump(&DAG);
989         dbgs() << "\nWith: ";
990         Trunc.getNode()->dump(&DAG);
991         dbgs() << '\n');
992   WorklistRemover DeadNodes(*this);
993   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
994   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
995   deleteAndRecombine(Load);
996   AddToWorklist(Trunc.getNode());
997 }
998 
999 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1000   Replace = false;
1001   SDLoc DL(Op);
1002   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1003     LoadSDNode *LD = cast<LoadSDNode>(Op);
1004     EVT MemVT = LD->getMemoryVT();
1005     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1006       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1007                                                        : ISD::EXTLOAD)
1008       : LD->getExtensionType();
1009     Replace = true;
1010     return DAG.getExtLoad(ExtType, DL, PVT,
1011                           LD->getChain(), LD->getBasePtr(),
1012                           MemVT, LD->getMemOperand());
1013   }
1014 
1015   unsigned Opc = Op.getOpcode();
1016   switch (Opc) {
1017   default: break;
1018   case ISD::AssertSext:
1019     return DAG.getNode(ISD::AssertSext, DL, PVT,
1020                        SExtPromoteOperand(Op.getOperand(0), PVT),
1021                        Op.getOperand(1));
1022   case ISD::AssertZext:
1023     return DAG.getNode(ISD::AssertZext, DL, PVT,
1024                        ZExtPromoteOperand(Op.getOperand(0), PVT),
1025                        Op.getOperand(1));
1026   case ISD::Constant: {
1027     unsigned ExtOpc =
1028       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1029     return DAG.getNode(ExtOpc, DL, PVT, Op);
1030   }
1031   }
1032 
1033   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1034     return SDValue();
1035   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1036 }
1037 
1038 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1039   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1040     return SDValue();
1041   EVT OldVT = Op.getValueType();
1042   SDLoc DL(Op);
1043   bool Replace = false;
1044   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1045   if (!NewOp.getNode())
1046     return SDValue();
1047   AddToWorklist(NewOp.getNode());
1048 
1049   if (Replace)
1050     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1051   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1052                      DAG.getValueType(OldVT));
1053 }
1054 
1055 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1056   EVT OldVT = Op.getValueType();
1057   SDLoc DL(Op);
1058   bool Replace = false;
1059   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1060   if (!NewOp.getNode())
1061     return SDValue();
1062   AddToWorklist(NewOp.getNode());
1063 
1064   if (Replace)
1065     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1066   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1067 }
1068 
1069 /// Promote the specified integer binary operation if the target indicates it is
1070 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1071 /// i32 since i16 instructions are longer.
1072 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1073   if (!LegalOperations)
1074     return SDValue();
1075 
1076   EVT VT = Op.getValueType();
1077   if (VT.isVector() || !VT.isInteger())
1078     return SDValue();
1079 
1080   // If operation type is 'undesirable', e.g. i16 on x86, consider
1081   // promoting it.
1082   unsigned Opc = Op.getOpcode();
1083   if (TLI.isTypeDesirableForOp(Opc, VT))
1084     return SDValue();
1085 
1086   EVT PVT = VT;
1087   // Consult target whether it is a good idea to promote this operation and
1088   // what's the right type to promote it to.
1089   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1090     assert(PVT != VT && "Don't know what type to promote to!");
1091 
1092     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1093 
1094     bool Replace0 = false;
1095     SDValue N0 = Op.getOperand(0);
1096     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1097 
1098     bool Replace1 = false;
1099     SDValue N1 = Op.getOperand(1);
1100     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1101     SDLoc DL(Op);
1102 
1103     SDValue RV =
1104         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1105 
1106     // New replace instances of N0 and N1
1107     if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
1108         NN0.getOpcode() != ISD::DELETED_NODE) {
1109       AddToWorklist(NN0.getNode());
1110       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1111     }
1112 
1113     if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
1114         NN1.getOpcode() != ISD::DELETED_NODE) {
1115       AddToWorklist(NN1.getNode());
1116       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1117     }
1118 
1119     // Deal with Op being deleted.
1120     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1121       return RV;
1122   }
1123   return SDValue();
1124 }
1125 
1126 /// Promote the specified integer shift operation if the target indicates it is
1127 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1128 /// i32 since i16 instructions are longer.
1129 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1130   if (!LegalOperations)
1131     return SDValue();
1132 
1133   EVT VT = Op.getValueType();
1134   if (VT.isVector() || !VT.isInteger())
1135     return SDValue();
1136 
1137   // If operation type is 'undesirable', e.g. i16 on x86, consider
1138   // promoting it.
1139   unsigned Opc = Op.getOpcode();
1140   if (TLI.isTypeDesirableForOp(Opc, VT))
1141     return SDValue();
1142 
1143   EVT PVT = VT;
1144   // Consult target whether it is a good idea to promote this operation and
1145   // what's the right type to promote it to.
1146   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1147     assert(PVT != VT && "Don't know what type to promote to!");
1148 
1149     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1150 
1151     bool Replace = false;
1152     SDValue N0 = Op.getOperand(0);
1153     SDValue N1 = Op.getOperand(1);
1154     if (Opc == ISD::SRA)
1155       N0 = SExtPromoteOperand(N0, PVT);
1156     else if (Opc == ISD::SRL)
1157       N0 = ZExtPromoteOperand(N0, PVT);
1158     else
1159       N0 = PromoteOperand(N0, PVT, Replace);
1160 
1161     if (!N0.getNode())
1162       return SDValue();
1163 
1164     SDLoc DL(Op);
1165     SDValue RV =
1166         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1167 
1168     AddToWorklist(N0.getNode());
1169     if (Replace)
1170       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1171 
1172     // Deal with Op being deleted.
1173     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1174       return RV;
1175   }
1176   return SDValue();
1177 }
1178 
1179 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1180   if (!LegalOperations)
1181     return SDValue();
1182 
1183   EVT VT = Op.getValueType();
1184   if (VT.isVector() || !VT.isInteger())
1185     return SDValue();
1186 
1187   // If operation type is 'undesirable', e.g. i16 on x86, consider
1188   // promoting it.
1189   unsigned Opc = Op.getOpcode();
1190   if (TLI.isTypeDesirableForOp(Opc, VT))
1191     return SDValue();
1192 
1193   EVT PVT = VT;
1194   // Consult target whether it is a good idea to promote this operation and
1195   // what's the right type to promote it to.
1196   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1197     assert(PVT != VT && "Don't know what type to promote to!");
1198     // fold (aext (aext x)) -> (aext x)
1199     // fold (aext (zext x)) -> (zext x)
1200     // fold (aext (sext x)) -> (sext x)
1201     DEBUG(dbgs() << "\nPromoting ";
1202           Op.getNode()->dump(&DAG));
1203     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1204   }
1205   return SDValue();
1206 }
1207 
1208 bool DAGCombiner::PromoteLoad(SDValue Op) {
1209   if (!LegalOperations)
1210     return false;
1211 
1212   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1213     return false;
1214 
1215   EVT VT = Op.getValueType();
1216   if (VT.isVector() || !VT.isInteger())
1217     return false;
1218 
1219   // If operation type is 'undesirable', e.g. i16 on x86, consider
1220   // promoting it.
1221   unsigned Opc = Op.getOpcode();
1222   if (TLI.isTypeDesirableForOp(Opc, VT))
1223     return false;
1224 
1225   EVT PVT = VT;
1226   // Consult target whether it is a good idea to promote this operation and
1227   // what's the right type to promote it to.
1228   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1229     assert(PVT != VT && "Don't know what type to promote to!");
1230 
1231     SDLoc DL(Op);
1232     SDNode *N = Op.getNode();
1233     LoadSDNode *LD = cast<LoadSDNode>(N);
1234     EVT MemVT = LD->getMemoryVT();
1235     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1236       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1237                                                        : ISD::EXTLOAD)
1238       : LD->getExtensionType();
1239     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1240                                    LD->getChain(), LD->getBasePtr(),
1241                                    MemVT, LD->getMemOperand());
1242     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1243 
1244     DEBUG(dbgs() << "\nPromoting ";
1245           N->dump(&DAG);
1246           dbgs() << "\nTo: ";
1247           Result.getNode()->dump(&DAG);
1248           dbgs() << '\n');
1249     WorklistRemover DeadNodes(*this);
1250     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1251     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1252     deleteAndRecombine(N);
1253     AddToWorklist(Result.getNode());
1254     return true;
1255   }
1256   return false;
1257 }
1258 
1259 /// \brief Recursively delete a node which has no uses and any operands for
1260 /// which it is the only use.
1261 ///
1262 /// Note that this both deletes the nodes and removes them from the worklist.
1263 /// It also adds any nodes who have had a user deleted to the worklist as they
1264 /// may now have only one use and subject to other combines.
1265 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1266   if (!N->use_empty())
1267     return false;
1268 
1269   SmallSetVector<SDNode *, 16> Nodes;
1270   Nodes.insert(N);
1271   do {
1272     N = Nodes.pop_back_val();
1273     if (!N)
1274       continue;
1275 
1276     if (N->use_empty()) {
1277       for (const SDValue &ChildN : N->op_values())
1278         Nodes.insert(ChildN.getNode());
1279 
1280       removeFromWorklist(N);
1281       DAG.DeleteNode(N);
1282     } else {
1283       AddToWorklist(N);
1284     }
1285   } while (!Nodes.empty());
1286   return true;
1287 }
1288 
1289 //===----------------------------------------------------------------------===//
1290 //  Main DAG Combiner implementation
1291 //===----------------------------------------------------------------------===//
1292 
1293 void DAGCombiner::Run(CombineLevel AtLevel) {
1294   // set the instance variables, so that the various visit routines may use it.
1295   Level = AtLevel;
1296   LegalOperations = Level >= AfterLegalizeVectorOps;
1297   LegalTypes = Level >= AfterLegalizeTypes;
1298 
1299   // Add all the dag nodes to the worklist.
1300   for (SDNode &Node : DAG.allnodes())
1301     AddToWorklist(&Node);
1302 
1303   // Create a dummy node (which is not added to allnodes), that adds a reference
1304   // to the root node, preventing it from being deleted, and tracking any
1305   // changes of the root.
1306   HandleSDNode Dummy(DAG.getRoot());
1307 
1308   // While the worklist isn't empty, find a node and try to combine it.
1309   while (!WorklistMap.empty()) {
1310     SDNode *N;
1311     // The Worklist holds the SDNodes in order, but it may contain null entries.
1312     do {
1313       N = Worklist.pop_back_val();
1314     } while (!N);
1315 
1316     bool GoodWorklistEntry = WorklistMap.erase(N);
1317     (void)GoodWorklistEntry;
1318     assert(GoodWorklistEntry &&
1319            "Found a worklist entry without a corresponding map entry!");
1320 
1321     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1322     // N is deleted from the DAG, since they too may now be dead or may have a
1323     // reduced number of uses, allowing other xforms.
1324     if (recursivelyDeleteUnusedNodes(N))
1325       continue;
1326 
1327     WorklistRemover DeadNodes(*this);
1328 
1329     // If this combine is running after legalizing the DAG, re-legalize any
1330     // nodes pulled off the worklist.
1331     if (Level == AfterLegalizeDAG) {
1332       SmallSetVector<SDNode *, 16> UpdatedNodes;
1333       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1334 
1335       for (SDNode *LN : UpdatedNodes) {
1336         AddToWorklist(LN);
1337         AddUsersToWorklist(LN);
1338       }
1339       if (!NIsValid)
1340         continue;
1341     }
1342 
1343     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1344 
1345     // Add any operands of the new node which have not yet been combined to the
1346     // worklist as well. Because the worklist uniques things already, this
1347     // won't repeatedly process the same operand.
1348     CombinedNodes.insert(N);
1349     for (const SDValue &ChildN : N->op_values())
1350       if (!CombinedNodes.count(ChildN.getNode()))
1351         AddToWorklist(ChildN.getNode());
1352 
1353     SDValue RV = combine(N);
1354 
1355     if (!RV.getNode())
1356       continue;
1357 
1358     ++NodesCombined;
1359 
1360     // If we get back the same node we passed in, rather than a new node or
1361     // zero, we know that the node must have defined multiple values and
1362     // CombineTo was used.  Since CombineTo takes care of the worklist
1363     // mechanics for us, we have no work to do in this case.
1364     if (RV.getNode() == N)
1365       continue;
1366 
1367     assert(N->getOpcode() != ISD::DELETED_NODE &&
1368            RV.getOpcode() != ISD::DELETED_NODE &&
1369            "Node was deleted but visit returned new node!");
1370 
1371     DEBUG(dbgs() << " ... into: ";
1372           RV.getNode()->dump(&DAG));
1373 
1374     if (N->getNumValues() == RV.getNode()->getNumValues())
1375       DAG.ReplaceAllUsesWith(N, RV.getNode());
1376     else {
1377       assert(N->getValueType(0) == RV.getValueType() &&
1378              N->getNumValues() == 1 && "Type mismatch");
1379       DAG.ReplaceAllUsesWith(N, &RV);
1380     }
1381 
1382     // Push the new node and any users onto the worklist
1383     AddToWorklist(RV.getNode());
1384     AddUsersToWorklist(RV.getNode());
1385 
1386     // Finally, if the node is now dead, remove it from the graph.  The node
1387     // may not be dead if the replacement process recursively simplified to
1388     // something else needing this node. This will also take care of adding any
1389     // operands which have lost a user to the worklist.
1390     recursivelyDeleteUnusedNodes(N);
1391   }
1392 
1393   // If the root changed (e.g. it was a dead load, update the root).
1394   DAG.setRoot(Dummy.getValue());
1395   DAG.RemoveDeadNodes();
1396 }
1397 
1398 SDValue DAGCombiner::visit(SDNode *N) {
1399   switch (N->getOpcode()) {
1400   default: break;
1401   case ISD::TokenFactor:        return visitTokenFactor(N);
1402   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1403   case ISD::ADD:                return visitADD(N);
1404   case ISD::SUB:                return visitSUB(N);
1405   case ISD::ADDC:               return visitADDC(N);
1406   case ISD::UADDO:              return visitUADDO(N);
1407   case ISD::SUBC:               return visitSUBC(N);
1408   case ISD::USUBO:              return visitUSUBO(N);
1409   case ISD::ADDE:               return visitADDE(N);
1410   case ISD::SUBE:               return visitSUBE(N);
1411   case ISD::MUL:                return visitMUL(N);
1412   case ISD::SDIV:               return visitSDIV(N);
1413   case ISD::UDIV:               return visitUDIV(N);
1414   case ISD::SREM:
1415   case ISD::UREM:               return visitREM(N);
1416   case ISD::MULHU:              return visitMULHU(N);
1417   case ISD::MULHS:              return visitMULHS(N);
1418   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1419   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1420   case ISD::SMULO:              return visitSMULO(N);
1421   case ISD::UMULO:              return visitUMULO(N);
1422   case ISD::SMIN:
1423   case ISD::SMAX:
1424   case ISD::UMIN:
1425   case ISD::UMAX:               return visitIMINMAX(N);
1426   case ISD::AND:                return visitAND(N);
1427   case ISD::OR:                 return visitOR(N);
1428   case ISD::XOR:                return visitXOR(N);
1429   case ISD::SHL:                return visitSHL(N);
1430   case ISD::SRA:                return visitSRA(N);
1431   case ISD::SRL:                return visitSRL(N);
1432   case ISD::ROTR:
1433   case ISD::ROTL:               return visitRotate(N);
1434   case ISD::ABS:                return visitABS(N);
1435   case ISD::BSWAP:              return visitBSWAP(N);
1436   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1437   case ISD::CTLZ:               return visitCTLZ(N);
1438   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1439   case ISD::CTTZ:               return visitCTTZ(N);
1440   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1441   case ISD::CTPOP:              return visitCTPOP(N);
1442   case ISD::SELECT:             return visitSELECT(N);
1443   case ISD::VSELECT:            return visitVSELECT(N);
1444   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1445   case ISD::SETCC:              return visitSETCC(N);
1446   case ISD::SETCCE:             return visitSETCCE(N);
1447   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1448   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1449   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1450   case ISD::AssertZext:         return visitAssertZext(N);
1451   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1452   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1453   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1454   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1455   case ISD::BITCAST:            return visitBITCAST(N);
1456   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1457   case ISD::FADD:               return visitFADD(N);
1458   case ISD::FSUB:               return visitFSUB(N);
1459   case ISD::FMUL:               return visitFMUL(N);
1460   case ISD::FMA:                return visitFMA(N);
1461   case ISD::FDIV:               return visitFDIV(N);
1462   case ISD::FREM:               return visitFREM(N);
1463   case ISD::FSQRT:              return visitFSQRT(N);
1464   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1465   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1466   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1467   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1468   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1469   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1470   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1471   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1472   case ISD::FNEG:               return visitFNEG(N);
1473   case ISD::FABS:               return visitFABS(N);
1474   case ISD::FFLOOR:             return visitFFLOOR(N);
1475   case ISD::FMINNUM:            return visitFMINNUM(N);
1476   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1477   case ISD::FCEIL:              return visitFCEIL(N);
1478   case ISD::FTRUNC:             return visitFTRUNC(N);
1479   case ISD::BRCOND:             return visitBRCOND(N);
1480   case ISD::BR_CC:              return visitBR_CC(N);
1481   case ISD::LOAD:               return visitLOAD(N);
1482   case ISD::STORE:              return visitSTORE(N);
1483   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1484   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1485   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1486   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1487   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1488   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1489   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1490   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1491   case ISD::MGATHER:            return visitMGATHER(N);
1492   case ISD::MLOAD:              return visitMLOAD(N);
1493   case ISD::MSCATTER:           return visitMSCATTER(N);
1494   case ISD::MSTORE:             return visitMSTORE(N);
1495   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1496   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1497   }
1498   return SDValue();
1499 }
1500 
1501 SDValue DAGCombiner::combine(SDNode *N) {
1502   SDValue RV = visit(N);
1503 
1504   // If nothing happened, try a target-specific DAG combine.
1505   if (!RV.getNode()) {
1506     assert(N->getOpcode() != ISD::DELETED_NODE &&
1507            "Node was deleted but visit returned NULL!");
1508 
1509     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1510         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1511 
1512       // Expose the DAG combiner to the target combiner impls.
1513       TargetLowering::DAGCombinerInfo
1514         DagCombineInfo(DAG, Level, false, this);
1515 
1516       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1517     }
1518   }
1519 
1520   // If nothing happened still, try promoting the operation.
1521   if (!RV.getNode()) {
1522     switch (N->getOpcode()) {
1523     default: break;
1524     case ISD::ADD:
1525     case ISD::SUB:
1526     case ISD::MUL:
1527     case ISD::AND:
1528     case ISD::OR:
1529     case ISD::XOR:
1530       RV = PromoteIntBinOp(SDValue(N, 0));
1531       break;
1532     case ISD::SHL:
1533     case ISD::SRA:
1534     case ISD::SRL:
1535       RV = PromoteIntShiftOp(SDValue(N, 0));
1536       break;
1537     case ISD::SIGN_EXTEND:
1538     case ISD::ZERO_EXTEND:
1539     case ISD::ANY_EXTEND:
1540       RV = PromoteExtend(SDValue(N, 0));
1541       break;
1542     case ISD::LOAD:
1543       if (PromoteLoad(SDValue(N, 0)))
1544         RV = SDValue(N, 0);
1545       break;
1546     }
1547   }
1548 
1549   // If N is a commutative binary node, try commuting it to enable more
1550   // sdisel CSE.
1551   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1552       N->getNumValues() == 1) {
1553     SDValue N0 = N->getOperand(0);
1554     SDValue N1 = N->getOperand(1);
1555 
1556     // Constant operands are canonicalized to RHS.
1557     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1558       SDValue Ops[] = {N1, N0};
1559       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1560                                             N->getFlags());
1561       if (CSENode)
1562         return SDValue(CSENode, 0);
1563     }
1564   }
1565 
1566   return RV;
1567 }
1568 
1569 /// Given a node, return its input chain if it has one, otherwise return a null
1570 /// sd operand.
1571 static SDValue getInputChainForNode(SDNode *N) {
1572   if (unsigned NumOps = N->getNumOperands()) {
1573     if (N->getOperand(0).getValueType() == MVT::Other)
1574       return N->getOperand(0);
1575     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1576       return N->getOperand(NumOps-1);
1577     for (unsigned i = 1; i < NumOps-1; ++i)
1578       if (N->getOperand(i).getValueType() == MVT::Other)
1579         return N->getOperand(i);
1580   }
1581   return SDValue();
1582 }
1583 
1584 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1585   // If N has two operands, where one has an input chain equal to the other,
1586   // the 'other' chain is redundant.
1587   if (N->getNumOperands() == 2) {
1588     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1589       return N->getOperand(0);
1590     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1591       return N->getOperand(1);
1592   }
1593 
1594   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1595   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1596   SmallPtrSet<SDNode*, 16> SeenOps;
1597   bool Changed = false;             // If we should replace this token factor.
1598 
1599   // Start out with this token factor.
1600   TFs.push_back(N);
1601 
1602   // Iterate through token factors.  The TFs grows when new token factors are
1603   // encountered.
1604   for (unsigned i = 0; i < TFs.size(); ++i) {
1605     SDNode *TF = TFs[i];
1606 
1607     // Check each of the operands.
1608     for (const SDValue &Op : TF->op_values()) {
1609 
1610       switch (Op.getOpcode()) {
1611       case ISD::EntryToken:
1612         // Entry tokens don't need to be added to the list. They are
1613         // redundant.
1614         Changed = true;
1615         break;
1616 
1617       case ISD::TokenFactor:
1618         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1619           // Queue up for processing.
1620           TFs.push_back(Op.getNode());
1621           // Clean up in case the token factor is removed.
1622           AddToWorklist(Op.getNode());
1623           Changed = true;
1624           break;
1625         }
1626         LLVM_FALLTHROUGH;
1627 
1628       default:
1629         // Only add if it isn't already in the list.
1630         if (SeenOps.insert(Op.getNode()).second)
1631           Ops.push_back(Op);
1632         else
1633           Changed = true;
1634         break;
1635       }
1636     }
1637   }
1638 
1639   // Remove Nodes that are chained to another node in the list. Do so
1640   // by walking up chains breath-first stopping when we've seen
1641   // another operand. In general we must climb to the EntryNode, but we can exit
1642   // early if we find all remaining work is associated with just one operand as
1643   // no further pruning is possible.
1644 
1645   // List of nodes to search through and original Ops from which they originate.
1646   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1647   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1648   SmallPtrSet<SDNode *, 16> SeenChains;
1649   bool DidPruneOps = false;
1650 
1651   unsigned NumLeftToConsider = 0;
1652   for (const SDValue &Op : Ops) {
1653     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1654     OpWorkCount.push_back(1);
1655   }
1656 
1657   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1658     // If this is an Op, we can remove the op from the list. Remark any
1659     // search associated with it as from the current OpNumber.
1660     if (SeenOps.count(Op) != 0) {
1661       Changed = true;
1662       DidPruneOps = true;
1663       unsigned OrigOpNumber = 0;
1664       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1665         OrigOpNumber++;
1666       assert((OrigOpNumber != Ops.size()) &&
1667              "expected to find TokenFactor Operand");
1668       // Re-mark worklist from OrigOpNumber to OpNumber
1669       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1670         if (Worklist[i].second == OrigOpNumber) {
1671           Worklist[i].second = OpNumber;
1672         }
1673       }
1674       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1675       OpWorkCount[OrigOpNumber] = 0;
1676       NumLeftToConsider--;
1677     }
1678     // Add if it's a new chain
1679     if (SeenChains.insert(Op).second) {
1680       OpWorkCount[OpNumber]++;
1681       Worklist.push_back(std::make_pair(Op, OpNumber));
1682     }
1683   };
1684 
1685   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1686     // We need at least be consider at least 2 Ops to prune.
1687     if (NumLeftToConsider <= 1)
1688       break;
1689     auto CurNode = Worklist[i].first;
1690     auto CurOpNumber = Worklist[i].second;
1691     assert((OpWorkCount[CurOpNumber] > 0) &&
1692            "Node should not appear in worklist");
1693     switch (CurNode->getOpcode()) {
1694     case ISD::EntryToken:
1695       // Hitting EntryToken is the only way for the search to terminate without
1696       // hitting
1697       // another operand's search. Prevent us from marking this operand
1698       // considered.
1699       NumLeftToConsider++;
1700       break;
1701     case ISD::TokenFactor:
1702       for (const SDValue &Op : CurNode->op_values())
1703         AddToWorklist(i, Op.getNode(), CurOpNumber);
1704       break;
1705     case ISD::CopyFromReg:
1706     case ISD::CopyToReg:
1707       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1708       break;
1709     default:
1710       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1711         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1712       break;
1713     }
1714     OpWorkCount[CurOpNumber]--;
1715     if (OpWorkCount[CurOpNumber] == 0)
1716       NumLeftToConsider--;
1717   }
1718 
1719   SDValue Result;
1720 
1721   // If we've changed things around then replace token factor.
1722   if (Changed) {
1723     if (Ops.empty()) {
1724       // The entry token is the only possible outcome.
1725       Result = DAG.getEntryNode();
1726     } else {
1727       if (DidPruneOps) {
1728         SmallVector<SDValue, 8> PrunedOps;
1729         //
1730         for (const SDValue &Op : Ops) {
1731           if (SeenChains.count(Op.getNode()) == 0)
1732             PrunedOps.push_back(Op);
1733         }
1734         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1735       } else {
1736         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1737       }
1738     }
1739 
1740     // Add users to worklist, since we may introduce a lot of new
1741     // chained token factors while removing memory deps.
1742     return CombineTo(N, Result, true /*add to worklist*/);
1743   }
1744 
1745   return Result;
1746 }
1747 
1748 /// MERGE_VALUES can always be eliminated.
1749 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1750   WorklistRemover DeadNodes(*this);
1751   // Replacing results may cause a different MERGE_VALUES to suddenly
1752   // be CSE'd with N, and carry its uses with it. Iterate until no
1753   // uses remain, to ensure that the node can be safely deleted.
1754   // First add the users of this node to the work list so that they
1755   // can be tried again once they have new operands.
1756   AddUsersToWorklist(N);
1757   do {
1758     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1759       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1760   } while (!N->use_empty());
1761   deleteAndRecombine(N);
1762   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1763 }
1764 
1765 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1766 /// ConstantSDNode pointer else nullptr.
1767 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1768   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1769   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1770 }
1771 
1772 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1773   auto BinOpcode = BO->getOpcode();
1774   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1775           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1776           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1777           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1778           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1779           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1780           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1781           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1782           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1783          "Unexpected binary operator");
1784 
1785   // Bail out if any constants are opaque because we can't constant fold those.
1786   SDValue C1 = BO->getOperand(1);
1787   if (!isConstantOrConstantVector(C1, true) &&
1788       !isConstantFPBuildVectorOrConstantFP(C1))
1789     return SDValue();
1790 
1791   // Don't do this unless the old select is going away. We want to eliminate the
1792   // binary operator, not replace a binop with a select.
1793   // TODO: Handle ISD::SELECT_CC.
1794   SDValue Sel = BO->getOperand(0);
1795   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1796     return SDValue();
1797 
1798   SDValue CT = Sel.getOperand(1);
1799   if (!isConstantOrConstantVector(CT, true) &&
1800       !isConstantFPBuildVectorOrConstantFP(CT))
1801     return SDValue();
1802 
1803   SDValue CF = Sel.getOperand(2);
1804   if (!isConstantOrConstantVector(CF, true) &&
1805       !isConstantFPBuildVectorOrConstantFP(CF))
1806     return SDValue();
1807 
1808   // We have a select-of-constants followed by a binary operator with a
1809   // constant. Eliminate the binop by pulling the constant math into the select.
1810   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1811   EVT VT = Sel.getValueType();
1812   SDLoc DL(Sel);
1813   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1814   assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1815           isConstantFPBuildVectorOrConstantFP(NewCT)) &&
1816          "Failed to constant fold a binop with constant operands");
1817 
1818   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1819   assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1820           isConstantFPBuildVectorOrConstantFP(NewCF)) &&
1821          "Failed to constant fold a binop with constant operands");
1822 
1823   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1824 }
1825 
1826 SDValue DAGCombiner::visitADD(SDNode *N) {
1827   SDValue N0 = N->getOperand(0);
1828   SDValue N1 = N->getOperand(1);
1829   EVT VT = N0.getValueType();
1830   SDLoc DL(N);
1831 
1832   // fold vector ops
1833   if (VT.isVector()) {
1834     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1835       return FoldedVOp;
1836 
1837     // fold (add x, 0) -> x, vector edition
1838     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1839       return N0;
1840     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1841       return N1;
1842   }
1843 
1844   // fold (add x, undef) -> undef
1845   if (N0.isUndef())
1846     return N0;
1847 
1848   if (N1.isUndef())
1849     return N1;
1850 
1851   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1852     // canonicalize constant to RHS
1853     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1854       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1855     // fold (add c1, c2) -> c1+c2
1856     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1857                                       N1.getNode());
1858   }
1859 
1860   // fold (add x, 0) -> x
1861   if (isNullConstant(N1))
1862     return N0;
1863 
1864   // fold ((c1-A)+c2) -> (c1+c2)-A
1865   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1866     if (N0.getOpcode() == ISD::SUB)
1867       if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1868         return DAG.getNode(ISD::SUB, DL, VT,
1869                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1870                            N0.getOperand(1));
1871       }
1872   }
1873 
1874   if (SDValue NewSel = foldBinOpIntoSelect(N))
1875     return NewSel;
1876 
1877   // reassociate add
1878   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1879     return RADD;
1880 
1881   // fold ((0-A) + B) -> B-A
1882   if (N0.getOpcode() == ISD::SUB &&
1883       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
1884     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1885 
1886   // fold (A + (0-B)) -> A-B
1887   if (N1.getOpcode() == ISD::SUB &&
1888       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
1889     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1890 
1891   // fold (A+(B-A)) -> B
1892   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1893     return N1.getOperand(0);
1894 
1895   // fold ((B-A)+A) -> B
1896   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1897     return N0.getOperand(0);
1898 
1899   // fold (A+(B-(A+C))) to (B-C)
1900   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1901       N0 == N1.getOperand(1).getOperand(0))
1902     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1903                        N1.getOperand(1).getOperand(1));
1904 
1905   // fold (A+(B-(C+A))) to (B-C)
1906   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1907       N0 == N1.getOperand(1).getOperand(1))
1908     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1909                        N1.getOperand(1).getOperand(0));
1910 
1911   // fold (A+((B-A)+or-C)) to (B+or-C)
1912   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1913       N1.getOperand(0).getOpcode() == ISD::SUB &&
1914       N0 == N1.getOperand(0).getOperand(1))
1915     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1916                        N1.getOperand(1));
1917 
1918   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1919   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1920     SDValue N00 = N0.getOperand(0);
1921     SDValue N01 = N0.getOperand(1);
1922     SDValue N10 = N1.getOperand(0);
1923     SDValue N11 = N1.getOperand(1);
1924 
1925     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
1926       return DAG.getNode(ISD::SUB, DL, VT,
1927                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1928                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1929   }
1930 
1931   if (SimplifyDemandedBits(SDValue(N, 0)))
1932     return SDValue(N, 0);
1933 
1934   // fold (a+b) -> (a|b) iff a and b share no bits.
1935   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1936       VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
1937     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1938 
1939   if (SDValue Combined = visitADDLike(N0, N1, N))
1940     return Combined;
1941 
1942   if (SDValue Combined = visitADDLike(N1, N0, N))
1943     return Combined;
1944 
1945   return SDValue();
1946 }
1947 
1948 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
1949   EVT VT = N0.getValueType();
1950   SDLoc DL(LocReference);
1951 
1952   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1953   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1954       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
1955     return DAG.getNode(ISD::SUB, DL, VT, N0,
1956                        DAG.getNode(ISD::SHL, DL, VT,
1957                                    N1.getOperand(0).getOperand(1),
1958                                    N1.getOperand(1)));
1959 
1960   if (N1.getOpcode() == ISD::AND) {
1961     SDValue AndOp0 = N1.getOperand(0);
1962     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1963     unsigned DestBits = VT.getScalarSizeInBits();
1964 
1965     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1966     // and similar xforms where the inner op is either ~0 or 0.
1967     if (NumSignBits == DestBits &&
1968         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
1969       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
1970   }
1971 
1972   // add (sext i1), X -> sub X, (zext i1)
1973   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1974       N0.getOperand(0).getValueType() == MVT::i1 &&
1975       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1976     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1977     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1978   }
1979 
1980   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1981   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1982     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1983     if (TN->getVT() == MVT::i1) {
1984       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1985                                  DAG.getConstant(1, DL, VT));
1986       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1987     }
1988   }
1989 
1990   return SDValue();
1991 }
1992 
1993 SDValue DAGCombiner::visitADDC(SDNode *N) {
1994   SDValue N0 = N->getOperand(0);
1995   SDValue N1 = N->getOperand(1);
1996   EVT VT = N0.getValueType();
1997   SDLoc DL(N);
1998 
1999   // If the flag result is dead, turn this into an ADD.
2000   if (!N->hasAnyUseOfValue(1))
2001     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2002                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2003 
2004   // canonicalize constant to RHS.
2005   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2006   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2007   if (N0C && !N1C)
2008     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2009 
2010   // fold (addc x, 0) -> x + no carry out
2011   if (isNullConstant(N1))
2012     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2013                                         DL, MVT::Glue));
2014 
2015   // If it cannot overflow, transform into an add.
2016   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2017     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2018                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2019 
2020   return SDValue();
2021 }
2022 
2023 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2024   SDValue N0 = N->getOperand(0);
2025   SDValue N1 = N->getOperand(1);
2026   EVT VT = N0.getValueType();
2027   if (VT.isVector())
2028     return SDValue();
2029 
2030   EVT CarryVT = N->getValueType(1);
2031   SDLoc DL(N);
2032 
2033   // If the flag result is dead, turn this into an ADD.
2034   if (!N->hasAnyUseOfValue(1))
2035     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2036                      DAG.getUNDEF(CarryVT));
2037 
2038   // canonicalize constant to RHS.
2039   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2040   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2041   if (N0C && !N1C)
2042     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2043 
2044   // fold (uaddo x, 0) -> x + no carry out
2045   if (isNullConstant(N1))
2046     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2047 
2048   // If it cannot overflow, transform into an add.
2049   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2050     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2051                      DAG.getConstant(0, DL, CarryVT));
2052 
2053   return SDValue();
2054 }
2055 
2056 SDValue DAGCombiner::visitADDE(SDNode *N) {
2057   SDValue N0 = N->getOperand(0);
2058   SDValue N1 = N->getOperand(1);
2059   SDValue CarryIn = N->getOperand(2);
2060 
2061   // canonicalize constant to RHS
2062   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2063   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2064   if (N0C && !N1C)
2065     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2066                        N1, N0, CarryIn);
2067 
2068   // fold (adde x, y, false) -> (addc x, y)
2069   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2070     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2071 
2072   return SDValue();
2073 }
2074 
2075 // Since it may not be valid to emit a fold to zero for vector initializers
2076 // check if we can before folding.
2077 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2078                              SelectionDAG &DAG, bool LegalOperations,
2079                              bool LegalTypes) {
2080   if (!VT.isVector())
2081     return DAG.getConstant(0, DL, VT);
2082   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2083     return DAG.getConstant(0, DL, VT);
2084   return SDValue();
2085 }
2086 
2087 SDValue DAGCombiner::visitSUB(SDNode *N) {
2088   SDValue N0 = N->getOperand(0);
2089   SDValue N1 = N->getOperand(1);
2090   EVT VT = N0.getValueType();
2091   SDLoc DL(N);
2092 
2093   // fold vector ops
2094   if (VT.isVector()) {
2095     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2096       return FoldedVOp;
2097 
2098     // fold (sub x, 0) -> x, vector edition
2099     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2100       return N0;
2101   }
2102 
2103   // fold (sub x, x) -> 0
2104   // FIXME: Refactor this and xor and other similar operations together.
2105   if (N0 == N1)
2106     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2107   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2108       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2109     // fold (sub c1, c2) -> c1-c2
2110     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2111                                       N1.getNode());
2112   }
2113 
2114   if (SDValue NewSel = foldBinOpIntoSelect(N))
2115     return NewSel;
2116 
2117   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2118 
2119   // fold (sub x, c) -> (add x, -c)
2120   if (N1C) {
2121     return DAG.getNode(ISD::ADD, DL, VT, N0,
2122                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2123   }
2124 
2125   if (isNullConstantOrNullSplatConstant(N0)) {
2126     unsigned BitWidth = VT.getScalarSizeInBits();
2127     // Right-shifting everything out but the sign bit followed by negation is
2128     // the same as flipping arithmetic/logical shift type without the negation:
2129     // -(X >>u 31) -> (X >>s 31)
2130     // -(X >>s 31) -> (X >>u 31)
2131     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2132       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2133       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2134         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2135         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2136           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2137       }
2138     }
2139 
2140     // 0 - X --> 0 if the sub is NUW.
2141     if (N->getFlags()->hasNoUnsignedWrap())
2142       return N0;
2143 
2144     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
2145       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2146       // N1 must be 0 because negating the minimum signed value is undefined.
2147       if (N->getFlags()->hasNoSignedWrap())
2148         return N0;
2149 
2150       // 0 - X --> X if X is 0 or the minimum signed value.
2151       return N1;
2152     }
2153   }
2154 
2155   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2156   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2157     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2158 
2159   // fold A-(A-B) -> B
2160   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2161     return N1.getOperand(1);
2162 
2163   // fold (A+B)-A -> B
2164   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2165     return N0.getOperand(1);
2166 
2167   // fold (A+B)-B -> A
2168   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2169     return N0.getOperand(0);
2170 
2171   // fold C2-(A+C1) -> (C2-C1)-A
2172   if (N1.getOpcode() == ISD::ADD) {
2173     SDValue N11 = N1.getOperand(1);
2174     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2175         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2176       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2177       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2178     }
2179   }
2180 
2181   // fold ((A+(B+or-C))-B) -> A+or-C
2182   if (N0.getOpcode() == ISD::ADD &&
2183       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2184        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2185       N0.getOperand(1).getOperand(0) == N1)
2186     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2187                        N0.getOperand(1).getOperand(1));
2188 
2189   // fold ((A+(C+B))-B) -> A+C
2190   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2191       N0.getOperand(1).getOperand(1) == N1)
2192     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2193                        N0.getOperand(1).getOperand(0));
2194 
2195   // fold ((A-(B-C))-C) -> A-B
2196   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2197       N0.getOperand(1).getOperand(1) == N1)
2198     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2199                        N0.getOperand(1).getOperand(0));
2200 
2201   // If either operand of a sub is undef, the result is undef
2202   if (N0.isUndef())
2203     return N0;
2204   if (N1.isUndef())
2205     return N1;
2206 
2207   // If the relocation model supports it, consider symbol offsets.
2208   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2209     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2210       // fold (sub Sym, c) -> Sym-c
2211       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2212         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2213                                     GA->getOffset() -
2214                                         (uint64_t)N1C->getSExtValue());
2215       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2216       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2217         if (GA->getGlobal() == GB->getGlobal())
2218           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2219                                  DL, VT);
2220     }
2221 
2222   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2223   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2224     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2225     if (TN->getVT() == MVT::i1) {
2226       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2227                                  DAG.getConstant(1, DL, VT));
2228       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2229     }
2230   }
2231 
2232   return SDValue();
2233 }
2234 
2235 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2236   SDValue N0 = N->getOperand(0);
2237   SDValue N1 = N->getOperand(1);
2238   EVT VT = N0.getValueType();
2239   SDLoc DL(N);
2240 
2241   // If the flag result is dead, turn this into an SUB.
2242   if (!N->hasAnyUseOfValue(1))
2243     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2244                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2245 
2246   // fold (subc x, x) -> 0 + no borrow
2247   if (N0 == N1)
2248     return CombineTo(N, DAG.getConstant(0, DL, VT),
2249                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2250 
2251   // fold (subc x, 0) -> x + no borrow
2252   if (isNullConstant(N1))
2253     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2254 
2255   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2256   if (isAllOnesConstant(N0))
2257     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2258                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2259 
2260   return SDValue();
2261 }
2262 
2263 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2264   SDValue N0 = N->getOperand(0);
2265   SDValue N1 = N->getOperand(1);
2266   EVT VT = N0.getValueType();
2267   if (VT.isVector())
2268     return SDValue();
2269 
2270   EVT CarryVT = N->getValueType(1);
2271   SDLoc DL(N);
2272 
2273   // If the flag result is dead, turn this into an SUB.
2274   if (!N->hasAnyUseOfValue(1))
2275     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2276                      DAG.getUNDEF(CarryVT));
2277 
2278   // fold (usubo x, x) -> 0 + no borrow
2279   if (N0 == N1)
2280     return CombineTo(N, DAG.getConstant(0, DL, VT),
2281                      DAG.getConstant(0, DL, CarryVT));
2282 
2283   // fold (usubo x, 0) -> x + no borrow
2284   if (isNullConstant(N1))
2285     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2286 
2287   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2288   if (isAllOnesConstant(N0))
2289     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2290                      DAG.getConstant(0, DL, CarryVT));
2291 
2292   return SDValue();
2293 }
2294 
2295 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2296   SDValue N0 = N->getOperand(0);
2297   SDValue N1 = N->getOperand(1);
2298   SDValue CarryIn = N->getOperand(2);
2299 
2300   // fold (sube x, y, false) -> (subc x, y)
2301   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2302     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2303 
2304   return SDValue();
2305 }
2306 
2307 SDValue DAGCombiner::visitMUL(SDNode *N) {
2308   SDValue N0 = N->getOperand(0);
2309   SDValue N1 = N->getOperand(1);
2310   EVT VT = N0.getValueType();
2311 
2312   // fold (mul x, undef) -> 0
2313   if (N0.isUndef() || N1.isUndef())
2314     return DAG.getConstant(0, SDLoc(N), VT);
2315 
2316   bool N0IsConst = false;
2317   bool N1IsConst = false;
2318   bool N1IsOpaqueConst = false;
2319   bool N0IsOpaqueConst = false;
2320   APInt ConstValue0, ConstValue1;
2321   // fold vector ops
2322   if (VT.isVector()) {
2323     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2324       return FoldedVOp;
2325 
2326     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2327     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2328   } else {
2329     N0IsConst = isa<ConstantSDNode>(N0);
2330     if (N0IsConst) {
2331       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2332       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2333     }
2334     N1IsConst = isa<ConstantSDNode>(N1);
2335     if (N1IsConst) {
2336       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2337       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2338     }
2339   }
2340 
2341   // fold (mul c1, c2) -> c1*c2
2342   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2343     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2344                                       N0.getNode(), N1.getNode());
2345 
2346   // canonicalize constant to RHS (vector doesn't have to splat)
2347   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2348      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2349     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2350   // fold (mul x, 0) -> 0
2351   if (N1IsConst && ConstValue1 == 0)
2352     return N1;
2353   // We require a splat of the entire scalar bit width for non-contiguous
2354   // bit patterns.
2355   bool IsFullSplat =
2356     ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2357   // fold (mul x, 1) -> x
2358   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2359     return N0;
2360 
2361   if (SDValue NewSel = foldBinOpIntoSelect(N))
2362     return NewSel;
2363 
2364   // fold (mul x, -1) -> 0-x
2365   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2366     SDLoc DL(N);
2367     return DAG.getNode(ISD::SUB, DL, VT,
2368                        DAG.getConstant(0, DL, VT), N0);
2369   }
2370   // fold (mul x, (1 << c)) -> x << c
2371   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2372       IsFullSplat) {
2373     SDLoc DL(N);
2374     return DAG.getNode(ISD::SHL, DL, VT, N0,
2375                        DAG.getConstant(ConstValue1.logBase2(), DL,
2376                                        getShiftAmountTy(N0.getValueType())));
2377   }
2378   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2379   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2380       IsFullSplat) {
2381     unsigned Log2Val = (-ConstValue1).logBase2();
2382     SDLoc DL(N);
2383     // FIXME: If the input is something that is easily negated (e.g. a
2384     // single-use add), we should put the negate there.
2385     return DAG.getNode(ISD::SUB, DL, VT,
2386                        DAG.getConstant(0, DL, VT),
2387                        DAG.getNode(ISD::SHL, DL, VT, N0,
2388                             DAG.getConstant(Log2Val, DL,
2389                                       getShiftAmountTy(N0.getValueType()))));
2390   }
2391 
2392   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2393   if (N0.getOpcode() == ISD::SHL &&
2394       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2395       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2396     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2397     if (isConstantOrConstantVector(C3))
2398       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2399   }
2400 
2401   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2402   // use.
2403   {
2404     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2405 
2406     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2407     if (N0.getOpcode() == ISD::SHL &&
2408         isConstantOrConstantVector(N0.getOperand(1)) &&
2409         N0.getNode()->hasOneUse()) {
2410       Sh = N0; Y = N1;
2411     } else if (N1.getOpcode() == ISD::SHL &&
2412                isConstantOrConstantVector(N1.getOperand(1)) &&
2413                N1.getNode()->hasOneUse()) {
2414       Sh = N1; Y = N0;
2415     }
2416 
2417     if (Sh.getNode()) {
2418       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2419       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2420     }
2421   }
2422 
2423   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2424   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2425       N0.getOpcode() == ISD::ADD &&
2426       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2427       isMulAddWithConstProfitable(N, N0, N1))
2428       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2429                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2430                                      N0.getOperand(0), N1),
2431                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2432                                      N0.getOperand(1), N1));
2433 
2434   // reassociate mul
2435   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2436     return RMUL;
2437 
2438   return SDValue();
2439 }
2440 
2441 /// Return true if divmod libcall is available.
2442 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2443                                      const TargetLowering &TLI) {
2444   RTLIB::Libcall LC;
2445   EVT NodeType = Node->getValueType(0);
2446   if (!NodeType.isSimple())
2447     return false;
2448   switch (NodeType.getSimpleVT().SimpleTy) {
2449   default: return false; // No libcall for vector types.
2450   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2451   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2452   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2453   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2454   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2455   }
2456 
2457   return TLI.getLibcallName(LC) != nullptr;
2458 }
2459 
2460 /// Issue divrem if both quotient and remainder are needed.
2461 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2462   if (Node->use_empty())
2463     return SDValue(); // This is a dead node, leave it alone.
2464 
2465   unsigned Opcode = Node->getOpcode();
2466   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2467   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2468 
2469   // DivMod lib calls can still work on non-legal types if using lib-calls.
2470   EVT VT = Node->getValueType(0);
2471   if (VT.isVector() || !VT.isInteger())
2472     return SDValue();
2473 
2474   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2475     return SDValue();
2476 
2477   // If DIVREM is going to get expanded into a libcall,
2478   // but there is no libcall available, then don't combine.
2479   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2480       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2481     return SDValue();
2482 
2483   // If div is legal, it's better to do the normal expansion
2484   unsigned OtherOpcode = 0;
2485   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2486     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2487     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2488       return SDValue();
2489   } else {
2490     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2491     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2492       return SDValue();
2493   }
2494 
2495   SDValue Op0 = Node->getOperand(0);
2496   SDValue Op1 = Node->getOperand(1);
2497   SDValue combined;
2498   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2499          UE = Op0.getNode()->use_end(); UI != UE;) {
2500     SDNode *User = *UI++;
2501     if (User == Node || User->use_empty())
2502       continue;
2503     // Convert the other matching node(s), too;
2504     // otherwise, the DIVREM may get target-legalized into something
2505     // target-specific that we won't be able to recognize.
2506     unsigned UserOpc = User->getOpcode();
2507     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2508         User->getOperand(0) == Op0 &&
2509         User->getOperand(1) == Op1) {
2510       if (!combined) {
2511         if (UserOpc == OtherOpcode) {
2512           SDVTList VTs = DAG.getVTList(VT, VT);
2513           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2514         } else if (UserOpc == DivRemOpc) {
2515           combined = SDValue(User, 0);
2516         } else {
2517           assert(UserOpc == Opcode);
2518           continue;
2519         }
2520       }
2521       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2522         CombineTo(User, combined);
2523       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2524         CombineTo(User, combined.getValue(1));
2525     }
2526   }
2527   return combined;
2528 }
2529 
2530 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2531   SDValue N0 = N->getOperand(0);
2532   SDValue N1 = N->getOperand(1);
2533   EVT VT = N->getValueType(0);
2534   SDLoc DL(N);
2535 
2536   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2537     return DAG.getUNDEF(VT);
2538 
2539   // undef / X -> 0
2540   // undef % X -> 0
2541   if (N0.isUndef())
2542     return DAG.getConstant(0, DL, VT);
2543 
2544   return SDValue();
2545 }
2546 
2547 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2548   SDValue N0 = N->getOperand(0);
2549   SDValue N1 = N->getOperand(1);
2550   EVT VT = N->getValueType(0);
2551 
2552   // fold vector ops
2553   if (VT.isVector())
2554     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2555       return FoldedVOp;
2556 
2557   SDLoc DL(N);
2558 
2559   // fold (sdiv c1, c2) -> c1/c2
2560   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2561   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2562   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2563     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2564   // fold (sdiv X, 1) -> X
2565   if (N1C && N1C->isOne())
2566     return N0;
2567   // fold (sdiv X, -1) -> 0-X
2568   if (N1C && N1C->isAllOnesValue())
2569     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2570 
2571   if (SDValue V = simplifyDivRem(N, DAG))
2572     return V;
2573 
2574   if (SDValue NewSel = foldBinOpIntoSelect(N))
2575     return NewSel;
2576 
2577   // If we know the sign bits of both operands are zero, strength reduce to a
2578   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2579   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2580     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2581 
2582   // fold (sdiv X, pow2) -> simple ops after legalize
2583   // FIXME: We check for the exact bit here because the generic lowering gives
2584   // better results in that case. The target-specific lowering should learn how
2585   // to handle exact sdivs efficiently.
2586   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2587       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2588       (N1C->getAPIntValue().isPowerOf2() ||
2589        (-N1C->getAPIntValue()).isPowerOf2())) {
2590     // Target-specific implementation of sdiv x, pow2.
2591     if (SDValue Res = BuildSDIVPow2(N))
2592       return Res;
2593 
2594     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2595 
2596     // Splat the sign bit into the register
2597     SDValue SGN =
2598         DAG.getNode(ISD::SRA, DL, VT, N0,
2599                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2600                                     getShiftAmountTy(N0.getValueType())));
2601     AddToWorklist(SGN.getNode());
2602 
2603     // Add (N0 < 0) ? abs2 - 1 : 0;
2604     SDValue SRL =
2605         DAG.getNode(ISD::SRL, DL, VT, SGN,
2606                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2607                                     getShiftAmountTy(SGN.getValueType())));
2608     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2609     AddToWorklist(SRL.getNode());
2610     AddToWorklist(ADD.getNode());    // Divide by pow2
2611     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2612                   DAG.getConstant(lg2, DL,
2613                                   getShiftAmountTy(ADD.getValueType())));
2614 
2615     // If we're dividing by a positive value, we're done.  Otherwise, we must
2616     // negate the result.
2617     if (N1C->getAPIntValue().isNonNegative())
2618       return SRA;
2619 
2620     AddToWorklist(SRA.getNode());
2621     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2622   }
2623 
2624   // If integer divide is expensive and we satisfy the requirements, emit an
2625   // alternate sequence.  Targets may check function attributes for size/speed
2626   // trade-offs.
2627   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2628   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2629     if (SDValue Op = BuildSDIV(N))
2630       return Op;
2631 
2632   // sdiv, srem -> sdivrem
2633   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2634   // true.  Otherwise, we break the simplification logic in visitREM().
2635   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2636     if (SDValue DivRem = useDivRem(N))
2637         return DivRem;
2638 
2639   return SDValue();
2640 }
2641 
2642 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2643   SDValue N0 = N->getOperand(0);
2644   SDValue N1 = N->getOperand(1);
2645   EVT VT = N->getValueType(0);
2646 
2647   // fold vector ops
2648   if (VT.isVector())
2649     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2650       return FoldedVOp;
2651 
2652   SDLoc DL(N);
2653 
2654   // fold (udiv c1, c2) -> c1/c2
2655   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2656   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2657   if (N0C && N1C)
2658     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2659                                                     N0C, N1C))
2660       return Folded;
2661 
2662   if (SDValue V = simplifyDivRem(N, DAG))
2663     return V;
2664 
2665   if (SDValue NewSel = foldBinOpIntoSelect(N))
2666     return NewSel;
2667 
2668   // fold (udiv x, (1 << c)) -> x >>u c
2669   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2670       DAG.isKnownToBeAPowerOfTwo(N1)) {
2671     SDValue LogBase2 = BuildLogBase2(N1, DL);
2672     AddToWorklist(LogBase2.getNode());
2673 
2674     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2675     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2676     AddToWorklist(Trunc.getNode());
2677     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2678   }
2679 
2680   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2681   if (N1.getOpcode() == ISD::SHL) {
2682     SDValue N10 = N1.getOperand(0);
2683     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2684         DAG.isKnownToBeAPowerOfTwo(N10)) {
2685       SDValue LogBase2 = BuildLogBase2(N10, DL);
2686       AddToWorklist(LogBase2.getNode());
2687 
2688       EVT ADDVT = N1.getOperand(1).getValueType();
2689       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2690       AddToWorklist(Trunc.getNode());
2691       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2692       AddToWorklist(Add.getNode());
2693       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2694     }
2695   }
2696 
2697   // fold (udiv x, c) -> alternate
2698   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2699   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2700     if (SDValue Op = BuildUDIV(N))
2701       return Op;
2702 
2703   // sdiv, srem -> sdivrem
2704   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2705   // true.  Otherwise, we break the simplification logic in visitREM().
2706   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2707     if (SDValue DivRem = useDivRem(N))
2708         return DivRem;
2709 
2710   return SDValue();
2711 }
2712 
2713 // handles ISD::SREM and ISD::UREM
2714 SDValue DAGCombiner::visitREM(SDNode *N) {
2715   unsigned Opcode = N->getOpcode();
2716   SDValue N0 = N->getOperand(0);
2717   SDValue N1 = N->getOperand(1);
2718   EVT VT = N->getValueType(0);
2719   bool isSigned = (Opcode == ISD::SREM);
2720   SDLoc DL(N);
2721 
2722   // fold (rem c1, c2) -> c1%c2
2723   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2724   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2725   if (N0C && N1C)
2726     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2727       return Folded;
2728 
2729   if (SDValue V = simplifyDivRem(N, DAG))
2730     return V;
2731 
2732   if (SDValue NewSel = foldBinOpIntoSelect(N))
2733     return NewSel;
2734 
2735   if (isSigned) {
2736     // If we know the sign bits of both operands are zero, strength reduce to a
2737     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2738     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2739       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2740   } else {
2741     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
2742     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2743       // fold (urem x, pow2) -> (and x, pow2-1)
2744       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2745       AddToWorklist(Add.getNode());
2746       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2747     }
2748     if (N1.getOpcode() == ISD::SHL &&
2749         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2750       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2751       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
2752       AddToWorklist(Add.getNode());
2753       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2754     }
2755   }
2756 
2757   AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2758 
2759   // If X/C can be simplified by the division-by-constant logic, lower
2760   // X%C to the equivalent of X-X/C*C.
2761   // To avoid mangling nodes, this simplification requires that the combine()
2762   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
2763   // against this by skipping the simplification if isIntDivCheap().  When
2764   // div is not cheap, combine will not return a DIVREM.  Regardless,
2765   // checking cheapness here makes sense since the simplification results in
2766   // fatter code.
2767   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2768     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2769     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2770     AddToWorklist(Div.getNode());
2771     SDValue OptimizedDiv = combine(Div.getNode());
2772     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2773       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2774              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2775       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2776       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2777       AddToWorklist(Mul.getNode());
2778       return Sub;
2779     }
2780   }
2781 
2782   // sdiv, srem -> sdivrem
2783   if (SDValue DivRem = useDivRem(N))
2784     return DivRem.getValue(1);
2785 
2786   return SDValue();
2787 }
2788 
2789 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2790   SDValue N0 = N->getOperand(0);
2791   SDValue N1 = N->getOperand(1);
2792   EVT VT = N->getValueType(0);
2793   SDLoc DL(N);
2794 
2795   // fold (mulhs x, 0) -> 0
2796   if (isNullConstant(N1))
2797     return N1;
2798   // fold (mulhs x, 1) -> (sra x, size(x)-1)
2799   if (isOneConstant(N1)) {
2800     SDLoc DL(N);
2801     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2802                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
2803                                        getShiftAmountTy(N0.getValueType())));
2804   }
2805   // fold (mulhs x, undef) -> 0
2806   if (N0.isUndef() || N1.isUndef())
2807     return DAG.getConstant(0, SDLoc(N), VT);
2808 
2809   // If the type twice as wide is legal, transform the mulhs to a wider multiply
2810   // plus a shift.
2811   if (VT.isSimple() && !VT.isVector()) {
2812     MVT Simple = VT.getSimpleVT();
2813     unsigned SimpleSize = Simple.getSizeInBits();
2814     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2815     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2816       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2817       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2818       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2819       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2820             DAG.getConstant(SimpleSize, DL,
2821                             getShiftAmountTy(N1.getValueType())));
2822       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2823     }
2824   }
2825 
2826   return SDValue();
2827 }
2828 
2829 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2830   SDValue N0 = N->getOperand(0);
2831   SDValue N1 = N->getOperand(1);
2832   EVT VT = N->getValueType(0);
2833   SDLoc DL(N);
2834 
2835   // fold (mulhu x, 0) -> 0
2836   if (isNullConstant(N1))
2837     return N1;
2838   // fold (mulhu x, 1) -> 0
2839   if (isOneConstant(N1))
2840     return DAG.getConstant(0, DL, N0.getValueType());
2841   // fold (mulhu x, undef) -> 0
2842   if (N0.isUndef() || N1.isUndef())
2843     return DAG.getConstant(0, DL, VT);
2844 
2845   // If the type twice as wide is legal, transform the mulhu to a wider multiply
2846   // plus a shift.
2847   if (VT.isSimple() && !VT.isVector()) {
2848     MVT Simple = VT.getSimpleVT();
2849     unsigned SimpleSize = Simple.getSizeInBits();
2850     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2851     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2852       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2853       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2854       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2855       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2856             DAG.getConstant(SimpleSize, DL,
2857                             getShiftAmountTy(N1.getValueType())));
2858       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2859     }
2860   }
2861 
2862   return SDValue();
2863 }
2864 
2865 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2866 /// give the opcodes for the two computations that are being performed. Return
2867 /// true if a simplification was made.
2868 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2869                                                 unsigned HiOp) {
2870   // If the high half is not needed, just compute the low half.
2871   bool HiExists = N->hasAnyUseOfValue(1);
2872   if (!HiExists &&
2873       (!LegalOperations ||
2874        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2875     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2876     return CombineTo(N, Res, Res);
2877   }
2878 
2879   // If the low half is not needed, just compute the high half.
2880   bool LoExists = N->hasAnyUseOfValue(0);
2881   if (!LoExists &&
2882       (!LegalOperations ||
2883        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2884     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2885     return CombineTo(N, Res, Res);
2886   }
2887 
2888   // If both halves are used, return as it is.
2889   if (LoExists && HiExists)
2890     return SDValue();
2891 
2892   // If the two computed results can be simplified separately, separate them.
2893   if (LoExists) {
2894     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2895     AddToWorklist(Lo.getNode());
2896     SDValue LoOpt = combine(Lo.getNode());
2897     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2898         (!LegalOperations ||
2899          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2900       return CombineTo(N, LoOpt, LoOpt);
2901   }
2902 
2903   if (HiExists) {
2904     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2905     AddToWorklist(Hi.getNode());
2906     SDValue HiOpt = combine(Hi.getNode());
2907     if (HiOpt.getNode() && HiOpt != Hi &&
2908         (!LegalOperations ||
2909          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2910       return CombineTo(N, HiOpt, HiOpt);
2911   }
2912 
2913   return SDValue();
2914 }
2915 
2916 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2917   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2918     return Res;
2919 
2920   EVT VT = N->getValueType(0);
2921   SDLoc DL(N);
2922 
2923   // If the type is twice as wide is legal, transform the mulhu to a wider
2924   // multiply plus a shift.
2925   if (VT.isSimple() && !VT.isVector()) {
2926     MVT Simple = VT.getSimpleVT();
2927     unsigned SimpleSize = Simple.getSizeInBits();
2928     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2929     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2930       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2931       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2932       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2933       // Compute the high part as N1.
2934       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2935             DAG.getConstant(SimpleSize, DL,
2936                             getShiftAmountTy(Lo.getValueType())));
2937       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2938       // Compute the low part as N0.
2939       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2940       return CombineTo(N, Lo, Hi);
2941     }
2942   }
2943 
2944   return SDValue();
2945 }
2946 
2947 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2948   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2949     return Res;
2950 
2951   EVT VT = N->getValueType(0);
2952   SDLoc DL(N);
2953 
2954   // If the type is twice as wide is legal, transform the mulhu to a wider
2955   // multiply plus a shift.
2956   if (VT.isSimple() && !VT.isVector()) {
2957     MVT Simple = VT.getSimpleVT();
2958     unsigned SimpleSize = Simple.getSizeInBits();
2959     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2960     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2961       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2962       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2963       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2964       // Compute the high part as N1.
2965       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2966             DAG.getConstant(SimpleSize, DL,
2967                             getShiftAmountTy(Lo.getValueType())));
2968       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2969       // Compute the low part as N0.
2970       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2971       return CombineTo(N, Lo, Hi);
2972     }
2973   }
2974 
2975   return SDValue();
2976 }
2977 
2978 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2979   // (smulo x, 2) -> (saddo x, x)
2980   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2981     if (C2->getAPIntValue() == 2)
2982       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2983                          N->getOperand(0), N->getOperand(0));
2984 
2985   return SDValue();
2986 }
2987 
2988 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2989   // (umulo x, 2) -> (uaddo x, x)
2990   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2991     if (C2->getAPIntValue() == 2)
2992       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2993                          N->getOperand(0), N->getOperand(0));
2994 
2995   return SDValue();
2996 }
2997 
2998 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2999   SDValue N0 = N->getOperand(0);
3000   SDValue N1 = N->getOperand(1);
3001   EVT VT = N0.getValueType();
3002 
3003   // fold vector ops
3004   if (VT.isVector())
3005     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3006       return FoldedVOp;
3007 
3008   // fold (add c1, c2) -> c1+c2
3009   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3010   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3011   if (N0C && N1C)
3012     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3013 
3014   // canonicalize constant to RHS
3015   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3016      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3017     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3018 
3019   return SDValue();
3020 }
3021 
3022 /// If this is a binary operator with two operands of the same opcode, try to
3023 /// simplify it.
3024 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3025   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3026   EVT VT = N0.getValueType();
3027   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3028 
3029   // Bail early if none of these transforms apply.
3030   if (N0.getNumOperands() == 0) return SDValue();
3031 
3032   // For each of OP in AND/OR/XOR:
3033   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3034   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3035   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3036   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3037   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3038   //
3039   // do not sink logical op inside of a vector extend, since it may combine
3040   // into a vsetcc.
3041   EVT Op0VT = N0.getOperand(0).getValueType();
3042   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3043        N0.getOpcode() == ISD::SIGN_EXTEND ||
3044        N0.getOpcode() == ISD::BSWAP ||
3045        // Avoid infinite looping with PromoteIntBinOp.
3046        (N0.getOpcode() == ISD::ANY_EXTEND &&
3047         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3048        (N0.getOpcode() == ISD::TRUNCATE &&
3049         (!TLI.isZExtFree(VT, Op0VT) ||
3050          !TLI.isTruncateFree(Op0VT, VT)) &&
3051         TLI.isTypeLegal(Op0VT))) &&
3052       !VT.isVector() &&
3053       Op0VT == N1.getOperand(0).getValueType() &&
3054       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3055     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3056                                  N0.getOperand(0).getValueType(),
3057                                  N0.getOperand(0), N1.getOperand(0));
3058     AddToWorklist(ORNode.getNode());
3059     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3060   }
3061 
3062   // For each of OP in SHL/SRL/SRA/AND...
3063   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3064   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3065   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3066   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3067        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3068       N0.getOperand(1) == N1.getOperand(1)) {
3069     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3070                                  N0.getOperand(0).getValueType(),
3071                                  N0.getOperand(0), N1.getOperand(0));
3072     AddToWorklist(ORNode.getNode());
3073     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3074                        ORNode, N0.getOperand(1));
3075   }
3076 
3077   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3078   // Only perform this optimization up until type legalization, before
3079   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3080   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3081   // we don't want to undo this promotion.
3082   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3083   // on scalars.
3084   if ((N0.getOpcode() == ISD::BITCAST ||
3085        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3086        Level <= AfterLegalizeTypes) {
3087     SDValue In0 = N0.getOperand(0);
3088     SDValue In1 = N1.getOperand(0);
3089     EVT In0Ty = In0.getValueType();
3090     EVT In1Ty = In1.getValueType();
3091     SDLoc DL(N);
3092     // If both incoming values are integers, and the original types are the
3093     // same.
3094     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3095       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3096       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3097       AddToWorklist(Op.getNode());
3098       return BC;
3099     }
3100   }
3101 
3102   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3103   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3104   // If both shuffles use the same mask, and both shuffle within a single
3105   // vector, then it is worthwhile to move the swizzle after the operation.
3106   // The type-legalizer generates this pattern when loading illegal
3107   // vector types from memory. In many cases this allows additional shuffle
3108   // optimizations.
3109   // There are other cases where moving the shuffle after the xor/and/or
3110   // is profitable even if shuffles don't perform a swizzle.
3111   // If both shuffles use the same mask, and both shuffles have the same first
3112   // or second operand, then it might still be profitable to move the shuffle
3113   // after the xor/and/or operation.
3114   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3115     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3116     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3117 
3118     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3119            "Inputs to shuffles are not the same type");
3120 
3121     // Check that both shuffles use the same mask. The masks are known to be of
3122     // the same length because the result vector type is the same.
3123     // Check also that shuffles have only one use to avoid introducing extra
3124     // instructions.
3125     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3126         SVN0->getMask().equals(SVN1->getMask())) {
3127       SDValue ShOp = N0->getOperand(1);
3128 
3129       // Don't try to fold this node if it requires introducing a
3130       // build vector of all zeros that might be illegal at this stage.
3131       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3132         if (!LegalTypes)
3133           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3134         else
3135           ShOp = SDValue();
3136       }
3137 
3138       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3139       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3140       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3141       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3142         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3143                                       N0->getOperand(0), N1->getOperand(0));
3144         AddToWorklist(NewNode.getNode());
3145         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3146                                     SVN0->getMask());
3147       }
3148 
3149       // Don't try to fold this node if it requires introducing a
3150       // build vector of all zeros that might be illegal at this stage.
3151       ShOp = N0->getOperand(0);
3152       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3153         if (!LegalTypes)
3154           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3155         else
3156           ShOp = SDValue();
3157       }
3158 
3159       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3160       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3161       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3162       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3163         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3164                                       N0->getOperand(1), N1->getOperand(1));
3165         AddToWorklist(NewNode.getNode());
3166         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3167                                     SVN0->getMask());
3168       }
3169     }
3170   }
3171 
3172   return SDValue();
3173 }
3174 
3175 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3176 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3177                                        const SDLoc &DL) {
3178   SDValue LL, LR, RL, RR, N0CC, N1CC;
3179   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3180       !isSetCCEquivalent(N1, RL, RR, N1CC))
3181     return SDValue();
3182 
3183   assert(N0.getValueType() == N1.getValueType() &&
3184          "Unexpected operand types for bitwise logic op");
3185   assert(LL.getValueType() == LR.getValueType() &&
3186          RL.getValueType() == RR.getValueType() &&
3187          "Unexpected operand types for setcc");
3188 
3189   // If we're here post-legalization or the logic op type is not i1, the logic
3190   // op type must match a setcc result type. Also, all folds require new
3191   // operations on the left and right operands, so those types must match.
3192   EVT VT = N0.getValueType();
3193   EVT OpVT = LL.getValueType();
3194   if (LegalOperations || VT != MVT::i1)
3195     if (VT != getSetCCResultType(OpVT))
3196       return SDValue();
3197   if (OpVT != RL.getValueType())
3198     return SDValue();
3199 
3200   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3201   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3202   bool IsInteger = OpVT.isInteger();
3203   if (LR == RR && CC0 == CC1 && IsInteger) {
3204     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3205     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3206 
3207     // All bits clear?
3208     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3209     // All sign bits clear?
3210     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3211     // Any bits set?
3212     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3213     // Any sign bits set?
3214     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3215 
3216     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3217     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3218     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3219     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3220     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3221       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3222       AddToWorklist(Or.getNode());
3223       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3224     }
3225 
3226     // All bits set?
3227     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3228     // All sign bits set?
3229     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3230     // Any bits clear?
3231     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3232     // Any sign bits clear?
3233     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3234 
3235     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3236     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3237     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3238     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3239     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3240       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3241       AddToWorklist(And.getNode());
3242       return DAG.getSetCC(DL, VT, And, LR, CC1);
3243     }
3244   }
3245 
3246   // TODO: What is the 'or' equivalent of this fold?
3247   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3248   if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3249       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3250        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3251     SDValue One = DAG.getConstant(1, DL, OpVT);
3252     SDValue Two = DAG.getConstant(2, DL, OpVT);
3253     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3254     AddToWorklist(Add.getNode());
3255     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3256   }
3257 
3258   // Canonicalize equivalent operands to LL == RL.
3259   if (LL == RR && LR == RL) {
3260     CC1 = ISD::getSetCCSwappedOperands(CC1);
3261     std::swap(RL, RR);
3262   }
3263 
3264   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3265   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3266   if (LL == RL && LR == RR) {
3267     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3268                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3269     if (NewCC != ISD::SETCC_INVALID &&
3270         (!LegalOperations ||
3271          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3272           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3273       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3274   }
3275 
3276   return SDValue();
3277 }
3278 
3279 /// This contains all DAGCombine rules which reduce two values combined by
3280 /// an And operation to a single value. This makes them reusable in the context
3281 /// of visitSELECT(). Rules involving constants are not included as
3282 /// visitSELECT() already handles those cases.
3283 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3284   EVT VT = N1.getValueType();
3285   SDLoc DL(N);
3286 
3287   // fold (and x, undef) -> 0
3288   if (N0.isUndef() || N1.isUndef())
3289     return DAG.getConstant(0, DL, VT);
3290 
3291   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3292     return V;
3293 
3294   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3295       VT.getSizeInBits() <= 64) {
3296     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3297       APInt ADDC = ADDI->getAPIntValue();
3298       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3299         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3300         // immediate for an add, but it is legal if its top c2 bits are set,
3301         // transform the ADD so the immediate doesn't need to be materialized
3302         // in a register.
3303         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3304           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3305                                              SRLI->getZExtValue());
3306           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3307             ADDC |= Mask;
3308             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3309               SDLoc DL0(N0);
3310               SDValue NewAdd =
3311                 DAG.getNode(ISD::ADD, DL0, VT,
3312                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3313               CombineTo(N0.getNode(), NewAdd);
3314               // Return N so it doesn't get rechecked!
3315               return SDValue(N, 0);
3316             }
3317           }
3318         }
3319       }
3320     }
3321   }
3322 
3323   // Reduce bit extract of low half of an integer to the narrower type.
3324   // (and (srl i64:x, K), KMask) ->
3325   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3326   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3327     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3328       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3329         unsigned Size = VT.getSizeInBits();
3330         const APInt &AndMask = CAnd->getAPIntValue();
3331         unsigned ShiftBits = CShift->getZExtValue();
3332 
3333         // Bail out, this node will probably disappear anyway.
3334         if (ShiftBits == 0)
3335           return SDValue();
3336 
3337         unsigned MaskBits = AndMask.countTrailingOnes();
3338         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3339 
3340         if (AndMask.isMask() &&
3341             // Required bits must not span the two halves of the integer and
3342             // must fit in the half size type.
3343             (ShiftBits + MaskBits <= Size / 2) &&
3344             TLI.isNarrowingProfitable(VT, HalfVT) &&
3345             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3346             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3347             TLI.isTruncateFree(VT, HalfVT) &&
3348             TLI.isZExtFree(HalfVT, VT)) {
3349           // The isNarrowingProfitable is to avoid regressions on PPC and
3350           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3351           // on downstream users of this. Those patterns could probably be
3352           // extended to handle extensions mixed in.
3353 
3354           SDValue SL(N0);
3355           assert(MaskBits <= Size);
3356 
3357           // Extracting the highest bit of the low half.
3358           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3359           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3360                                       N0.getOperand(0));
3361 
3362           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3363           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3364           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3365           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3366           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3367         }
3368       }
3369     }
3370   }
3371 
3372   return SDValue();
3373 }
3374 
3375 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3376                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3377                                    bool &NarrowLoad) {
3378   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3379 
3380   if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
3381     return false;
3382 
3383   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3384   LoadedVT = LoadN->getMemoryVT();
3385 
3386   if (ExtVT == LoadedVT &&
3387       (!LegalOperations ||
3388        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3389     // ZEXTLOAD will match without needing to change the size of the value being
3390     // loaded.
3391     NarrowLoad = false;
3392     return true;
3393   }
3394 
3395   // Do not change the width of a volatile load.
3396   if (LoadN->isVolatile())
3397     return false;
3398 
3399   // Do not generate loads of non-round integer types since these can
3400   // be expensive (and would be wrong if the type is not byte sized).
3401   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3402     return false;
3403 
3404   if (LegalOperations &&
3405       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3406     return false;
3407 
3408   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3409     return false;
3410 
3411   NarrowLoad = true;
3412   return true;
3413 }
3414 
3415 SDValue DAGCombiner::visitAND(SDNode *N) {
3416   SDValue N0 = N->getOperand(0);
3417   SDValue N1 = N->getOperand(1);
3418   EVT VT = N1.getValueType();
3419 
3420   // x & x --> x
3421   if (N0 == N1)
3422     return N0;
3423 
3424   // fold vector ops
3425   if (VT.isVector()) {
3426     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3427       return FoldedVOp;
3428 
3429     // fold (and x, 0) -> 0, vector edition
3430     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3431       // do not return N0, because undef node may exist in N0
3432       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3433                              SDLoc(N), N0.getValueType());
3434     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3435       // do not return N1, because undef node may exist in N1
3436       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3437                              SDLoc(N), N1.getValueType());
3438 
3439     // fold (and x, -1) -> x, vector edition
3440     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3441       return N1;
3442     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3443       return N0;
3444   }
3445 
3446   // fold (and c1, c2) -> c1&c2
3447   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3448   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3449   if (N0C && N1C && !N1C->isOpaque())
3450     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3451   // canonicalize constant to RHS
3452   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3453      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3454     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3455   // fold (and x, -1) -> x
3456   if (isAllOnesConstant(N1))
3457     return N0;
3458   // if (and x, c) is known to be zero, return 0
3459   unsigned BitWidth = VT.getScalarSizeInBits();
3460   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3461                                    APInt::getAllOnesValue(BitWidth)))
3462     return DAG.getConstant(0, SDLoc(N), VT);
3463 
3464   if (SDValue NewSel = foldBinOpIntoSelect(N))
3465     return NewSel;
3466 
3467   // reassociate and
3468   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3469     return RAND;
3470   // fold (and (or x, C), D) -> D if (C & D) == D
3471   if (N1C && N0.getOpcode() == ISD::OR)
3472     if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3473       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3474         return N1;
3475   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3476   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3477     SDValue N0Op0 = N0.getOperand(0);
3478     APInt Mask = ~N1C->getAPIntValue();
3479     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3480     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3481       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3482                                  N0.getValueType(), N0Op0);
3483 
3484       // Replace uses of the AND with uses of the Zero extend node.
3485       CombineTo(N, Zext);
3486 
3487       // We actually want to replace all uses of the any_extend with the
3488       // zero_extend, to avoid duplicating things.  This will later cause this
3489       // AND to be folded.
3490       CombineTo(N0.getNode(), Zext);
3491       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3492     }
3493   }
3494   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3495   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3496   // already be zero by virtue of the width of the base type of the load.
3497   //
3498   // the 'X' node here can either be nothing or an extract_vector_elt to catch
3499   // more cases.
3500   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3501        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
3502        N0.getOperand(0).getOpcode() == ISD::LOAD &&
3503        N0.getOperand(0).getResNo() == 0) ||
3504       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3505     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3506                                          N0 : N0.getOperand(0) );
3507 
3508     // Get the constant (if applicable) the zero'th operand is being ANDed with.
3509     // This can be a pure constant or a vector splat, in which case we treat the
3510     // vector as a scalar and use the splat value.
3511     APInt Constant = APInt::getNullValue(1);
3512     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3513       Constant = C->getAPIntValue();
3514     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3515       APInt SplatValue, SplatUndef;
3516       unsigned SplatBitSize;
3517       bool HasAnyUndefs;
3518       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3519                                              SplatBitSize, HasAnyUndefs);
3520       if (IsSplat) {
3521         // Undef bits can contribute to a possible optimisation if set, so
3522         // set them.
3523         SplatValue |= SplatUndef;
3524 
3525         // The splat value may be something like "0x00FFFFFF", which means 0 for
3526         // the first vector value and FF for the rest, repeating. We need a mask
3527         // that will apply equally to all members of the vector, so AND all the
3528         // lanes of the constant together.
3529         EVT VT = Vector->getValueType(0);
3530         unsigned BitWidth = VT.getScalarSizeInBits();
3531 
3532         // If the splat value has been compressed to a bitlength lower
3533         // than the size of the vector lane, we need to re-expand it to
3534         // the lane size.
3535         if (BitWidth > SplatBitSize)
3536           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3537                SplatBitSize < BitWidth;
3538                SplatBitSize = SplatBitSize * 2)
3539             SplatValue |= SplatValue.shl(SplatBitSize);
3540 
3541         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3542         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3543         if (SplatBitSize % BitWidth == 0) {
3544           Constant = APInt::getAllOnesValue(BitWidth);
3545           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3546             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3547         }
3548       }
3549     }
3550 
3551     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3552     // actually legal and isn't going to get expanded, else this is a false
3553     // optimisation.
3554     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3555                                                     Load->getValueType(0),
3556                                                     Load->getMemoryVT());
3557 
3558     // Resize the constant to the same size as the original memory access before
3559     // extension. If it is still the AllOnesValue then this AND is completely
3560     // unneeded.
3561     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3562 
3563     bool B;
3564     switch (Load->getExtensionType()) {
3565     default: B = false; break;
3566     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3567     case ISD::ZEXTLOAD:
3568     case ISD::NON_EXTLOAD: B = true; break;
3569     }
3570 
3571     if (B && Constant.isAllOnesValue()) {
3572       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3573       // preserve semantics once we get rid of the AND.
3574       SDValue NewLoad(Load, 0);
3575 
3576       // Fold the AND away. NewLoad may get replaced immediately.
3577       CombineTo(N, NewLoad);
3578 
3579       if (Load->getExtensionType() == ISD::EXTLOAD) {
3580         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3581                               Load->getValueType(0), SDLoc(Load),
3582                               Load->getChain(), Load->getBasePtr(),
3583                               Load->getOffset(), Load->getMemoryVT(),
3584                               Load->getMemOperand());
3585         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3586         if (Load->getNumValues() == 3) {
3587           // PRE/POST_INC loads have 3 values.
3588           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3589                            NewLoad.getValue(2) };
3590           CombineTo(Load, To, 3, true);
3591         } else {
3592           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3593         }
3594       }
3595 
3596       return SDValue(N, 0); // Return N so it doesn't get rechecked!
3597     }
3598   }
3599 
3600   // fold (and (load x), 255) -> (zextload x, i8)
3601   // fold (and (extload x, i16), 255) -> (zextload x, i8)
3602   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3603   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3604                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
3605                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3606     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3607     LoadSDNode *LN0 = HasAnyExt
3608       ? cast<LoadSDNode>(N0.getOperand(0))
3609       : cast<LoadSDNode>(N0);
3610     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3611         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3612       auto NarrowLoad = false;
3613       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3614       EVT ExtVT, LoadedVT;
3615       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3616                            NarrowLoad)) {
3617         if (!NarrowLoad) {
3618           SDValue NewLoad =
3619             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3620                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
3621                            LN0->getMemOperand());
3622           AddToWorklist(N);
3623           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3624           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3625         } else {
3626           EVT PtrType = LN0->getOperand(1).getValueType();
3627 
3628           unsigned Alignment = LN0->getAlignment();
3629           SDValue NewPtr = LN0->getBasePtr();
3630 
3631           // For big endian targets, we need to add an offset to the pointer
3632           // to load the correct bytes.  For little endian systems, we merely
3633           // need to read fewer bytes from the same pointer.
3634           if (DAG.getDataLayout().isBigEndian()) {
3635             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3636             unsigned EVTStoreBytes = ExtVT.getStoreSize();
3637             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3638             SDLoc DL(LN0);
3639             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3640                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3641             Alignment = MinAlign(Alignment, PtrOff);
3642           }
3643 
3644           AddToWorklist(NewPtr.getNode());
3645 
3646           SDValue Load = DAG.getExtLoad(
3647               ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3648               LN0->getPointerInfo(), ExtVT, Alignment,
3649               LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3650           AddToWorklist(N);
3651           CombineTo(LN0, Load, Load.getValue(1));
3652           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3653         }
3654       }
3655     }
3656   }
3657 
3658   if (SDValue Combined = visitANDLike(N0, N1, N))
3659     return Combined;
3660 
3661   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
3662   if (N0.getOpcode() == N1.getOpcode())
3663     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3664       return Tmp;
3665 
3666   // Masking the negated extension of a boolean is just the zero-extended
3667   // boolean:
3668   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3669   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3670   //
3671   // Note: the SimplifyDemandedBits fold below can make an information-losing
3672   // transform, and then we have no way to find this better fold.
3673   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3674     ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
3675     SDValue SubRHS = N0.getOperand(1);
3676     if (SubLHS && SubLHS->isNullValue()) {
3677       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3678           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3679         return SubRHS;
3680       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3681           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3682         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3683     }
3684   }
3685 
3686   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3687   // fold (and (sra)) -> (and (srl)) when possible.
3688   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
3689     return SDValue(N, 0);
3690 
3691   // fold (zext_inreg (extload x)) -> (zextload x)
3692   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3693     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3694     EVT MemVT = LN0->getMemoryVT();
3695     // If we zero all the possible extended bits, then we can turn this into
3696     // a zextload if we are running before legalize or the operation is legal.
3697     unsigned BitWidth = N1.getScalarValueSizeInBits();
3698     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3699                            BitWidth - MemVT.getScalarSizeInBits())) &&
3700         ((!LegalOperations && !LN0->isVolatile()) ||
3701          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3702       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3703                                        LN0->getChain(), LN0->getBasePtr(),
3704                                        MemVT, LN0->getMemOperand());
3705       AddToWorklist(N);
3706       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3707       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3708     }
3709   }
3710   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3711   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3712       N0.hasOneUse()) {
3713     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3714     EVT MemVT = LN0->getMemoryVT();
3715     // If we zero all the possible extended bits, then we can turn this into
3716     // a zextload if we are running before legalize or the operation is legal.
3717     unsigned BitWidth = N1.getScalarValueSizeInBits();
3718     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3719                            BitWidth - MemVT.getScalarSizeInBits())) &&
3720         ((!LegalOperations && !LN0->isVolatile()) ||
3721          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3722       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3723                                        LN0->getChain(), LN0->getBasePtr(),
3724                                        MemVT, LN0->getMemOperand());
3725       AddToWorklist(N);
3726       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3727       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3728     }
3729   }
3730   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3731   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3732     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3733                                            N0.getOperand(1), false))
3734       return BSwap;
3735   }
3736 
3737   return SDValue();
3738 }
3739 
3740 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3741 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3742                                         bool DemandHighBits) {
3743   if (!LegalOperations)
3744     return SDValue();
3745 
3746   EVT VT = N->getValueType(0);
3747   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3748     return SDValue();
3749   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3750     return SDValue();
3751 
3752   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3753   bool LookPassAnd0 = false;
3754   bool LookPassAnd1 = false;
3755   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3756       std::swap(N0, N1);
3757   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3758       std::swap(N0, N1);
3759   if (N0.getOpcode() == ISD::AND) {
3760     if (!N0.getNode()->hasOneUse())
3761       return SDValue();
3762     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3763     if (!N01C || N01C->getZExtValue() != 0xFF00)
3764       return SDValue();
3765     N0 = N0.getOperand(0);
3766     LookPassAnd0 = true;
3767   }
3768 
3769   if (N1.getOpcode() == ISD::AND) {
3770     if (!N1.getNode()->hasOneUse())
3771       return SDValue();
3772     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3773     if (!N11C || N11C->getZExtValue() != 0xFF)
3774       return SDValue();
3775     N1 = N1.getOperand(0);
3776     LookPassAnd1 = true;
3777   }
3778 
3779   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3780     std::swap(N0, N1);
3781   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3782     return SDValue();
3783   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3784     return SDValue();
3785 
3786   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3787   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3788   if (!N01C || !N11C)
3789     return SDValue();
3790   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3791     return SDValue();
3792 
3793   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3794   SDValue N00 = N0->getOperand(0);
3795   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3796     if (!N00.getNode()->hasOneUse())
3797       return SDValue();
3798     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3799     if (!N001C || N001C->getZExtValue() != 0xFF)
3800       return SDValue();
3801     N00 = N00.getOperand(0);
3802     LookPassAnd0 = true;
3803   }
3804 
3805   SDValue N10 = N1->getOperand(0);
3806   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3807     if (!N10.getNode()->hasOneUse())
3808       return SDValue();
3809     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3810     if (!N101C || N101C->getZExtValue() != 0xFF00)
3811       return SDValue();
3812     N10 = N10.getOperand(0);
3813     LookPassAnd1 = true;
3814   }
3815 
3816   if (N00 != N10)
3817     return SDValue();
3818 
3819   // Make sure everything beyond the low halfword gets set to zero since the SRL
3820   // 16 will clear the top bits.
3821   unsigned OpSizeInBits = VT.getSizeInBits();
3822   if (DemandHighBits && OpSizeInBits > 16) {
3823     // If the left-shift isn't masked out then the only way this is a bswap is
3824     // if all bits beyond the low 8 are 0. In that case the entire pattern
3825     // reduces to a left shift anyway: leave it for other parts of the combiner.
3826     if (!LookPassAnd0)
3827       return SDValue();
3828 
3829     // However, if the right shift isn't masked out then it might be because
3830     // it's not needed. See if we can spot that too.
3831     if (!LookPassAnd1 &&
3832         !DAG.MaskedValueIsZero(
3833             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3834       return SDValue();
3835   }
3836 
3837   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3838   if (OpSizeInBits > 16) {
3839     SDLoc DL(N);
3840     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3841                       DAG.getConstant(OpSizeInBits - 16, DL,
3842                                       getShiftAmountTy(VT)));
3843   }
3844   return Res;
3845 }
3846 
3847 /// Return true if the specified node is an element that makes up a 32-bit
3848 /// packed halfword byteswap.
3849 /// ((x & 0x000000ff) << 8) |
3850 /// ((x & 0x0000ff00) >> 8) |
3851 /// ((x & 0x00ff0000) << 8) |
3852 /// ((x & 0xff000000) >> 8)
3853 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3854   if (!N.getNode()->hasOneUse())
3855     return false;
3856 
3857   unsigned Opc = N.getOpcode();
3858   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3859     return false;
3860 
3861   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3862   if (!N1C)
3863     return false;
3864 
3865   unsigned Num;
3866   switch (N1C->getZExtValue()) {
3867   default:
3868     return false;
3869   case 0xFF:       Num = 0; break;
3870   case 0xFF00:     Num = 1; break;
3871   case 0xFF0000:   Num = 2; break;
3872   case 0xFF000000: Num = 3; break;
3873   }
3874 
3875   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3876   SDValue N0 = N.getOperand(0);
3877   if (Opc == ISD::AND) {
3878     if (Num == 0 || Num == 2) {
3879       // (x >> 8) & 0xff
3880       // (x >> 8) & 0xff0000
3881       if (N0.getOpcode() != ISD::SRL)
3882         return false;
3883       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3884       if (!C || C->getZExtValue() != 8)
3885         return false;
3886     } else {
3887       // (x << 8) & 0xff00
3888       // (x << 8) & 0xff000000
3889       if (N0.getOpcode() != ISD::SHL)
3890         return false;
3891       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3892       if (!C || C->getZExtValue() != 8)
3893         return false;
3894     }
3895   } else if (Opc == ISD::SHL) {
3896     // (x & 0xff) << 8
3897     // (x & 0xff0000) << 8
3898     if (Num != 0 && Num != 2)
3899       return false;
3900     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3901     if (!C || C->getZExtValue() != 8)
3902       return false;
3903   } else { // Opc == ISD::SRL
3904     // (x & 0xff00) >> 8
3905     // (x & 0xff000000) >> 8
3906     if (Num != 1 && Num != 3)
3907       return false;
3908     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3909     if (!C || C->getZExtValue() != 8)
3910       return false;
3911   }
3912 
3913   if (Parts[Num])
3914     return false;
3915 
3916   Parts[Num] = N0.getOperand(0).getNode();
3917   return true;
3918 }
3919 
3920 /// Match a 32-bit packed halfword bswap. That is
3921 /// ((x & 0x000000ff) << 8) |
3922 /// ((x & 0x0000ff00) >> 8) |
3923 /// ((x & 0x00ff0000) << 8) |
3924 /// ((x & 0xff000000) >> 8)
3925 /// => (rotl (bswap x), 16)
3926 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3927   if (!LegalOperations)
3928     return SDValue();
3929 
3930   EVT VT = N->getValueType(0);
3931   if (VT != MVT::i32)
3932     return SDValue();
3933   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3934     return SDValue();
3935 
3936   // Look for either
3937   // (or (or (and), (and)), (or (and), (and)))
3938   // (or (or (or (and), (and)), (and)), (and))
3939   if (N0.getOpcode() != ISD::OR)
3940     return SDValue();
3941   SDValue N00 = N0.getOperand(0);
3942   SDValue N01 = N0.getOperand(1);
3943   SDNode *Parts[4] = {};
3944 
3945   if (N1.getOpcode() == ISD::OR &&
3946       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3947     // (or (or (and), (and)), (or (and), (and)))
3948     SDValue N000 = N00.getOperand(0);
3949     if (!isBSwapHWordElement(N000, Parts))
3950       return SDValue();
3951 
3952     SDValue N001 = N00.getOperand(1);
3953     if (!isBSwapHWordElement(N001, Parts))
3954       return SDValue();
3955     SDValue N010 = N01.getOperand(0);
3956     if (!isBSwapHWordElement(N010, Parts))
3957       return SDValue();
3958     SDValue N011 = N01.getOperand(1);
3959     if (!isBSwapHWordElement(N011, Parts))
3960       return SDValue();
3961   } else {
3962     // (or (or (or (and), (and)), (and)), (and))
3963     if (!isBSwapHWordElement(N1, Parts))
3964       return SDValue();
3965     if (!isBSwapHWordElement(N01, Parts))
3966       return SDValue();
3967     if (N00.getOpcode() != ISD::OR)
3968       return SDValue();
3969     SDValue N000 = N00.getOperand(0);
3970     if (!isBSwapHWordElement(N000, Parts))
3971       return SDValue();
3972     SDValue N001 = N00.getOperand(1);
3973     if (!isBSwapHWordElement(N001, Parts))
3974       return SDValue();
3975   }
3976 
3977   // Make sure the parts are all coming from the same node.
3978   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3979     return SDValue();
3980 
3981   SDLoc DL(N);
3982   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3983                               SDValue(Parts[0], 0));
3984 
3985   // Result of the bswap should be rotated by 16. If it's not legal, then
3986   // do  (x << 16) | (x >> 16).
3987   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3988   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3989     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3990   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3991     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3992   return DAG.getNode(ISD::OR, DL, VT,
3993                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3994                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3995 }
3996 
3997 /// This contains all DAGCombine rules which reduce two values combined by
3998 /// an Or operation to a single value \see visitANDLike().
3999 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4000   EVT VT = N1.getValueType();
4001   SDLoc DL(N);
4002 
4003   // fold (or x, undef) -> -1
4004   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4005     return DAG.getAllOnesConstant(DL, VT);
4006 
4007   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4008     return V;
4009 
4010   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4011   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4012       // Don't increase # computations.
4013       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4014     // We can only do this xform if we know that bits from X that are set in C2
4015     // but not in C1 are already zero.  Likewise for Y.
4016     if (const ConstantSDNode *N0O1C =
4017         getAsNonOpaqueConstant(N0.getOperand(1))) {
4018       if (const ConstantSDNode *N1O1C =
4019           getAsNonOpaqueConstant(N1.getOperand(1))) {
4020         // We can only do this xform if we know that bits from X that are set in
4021         // C2 but not in C1 are already zero.  Likewise for Y.
4022         const APInt &LHSMask = N0O1C->getAPIntValue();
4023         const APInt &RHSMask = N1O1C->getAPIntValue();
4024 
4025         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4026             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4027           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4028                                   N0.getOperand(0), N1.getOperand(0));
4029           return DAG.getNode(ISD::AND, DL, VT, X,
4030                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4031         }
4032       }
4033     }
4034   }
4035 
4036   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4037   if (N0.getOpcode() == ISD::AND &&
4038       N1.getOpcode() == ISD::AND &&
4039       N0.getOperand(0) == N1.getOperand(0) &&
4040       // Don't increase # computations.
4041       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4042     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4043                             N0.getOperand(1), N1.getOperand(1));
4044     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4045   }
4046 
4047   return SDValue();
4048 }
4049 
4050 SDValue DAGCombiner::visitOR(SDNode *N) {
4051   SDValue N0 = N->getOperand(0);
4052   SDValue N1 = N->getOperand(1);
4053   EVT VT = N1.getValueType();
4054 
4055   // x | x --> x
4056   if (N0 == N1)
4057     return N0;
4058 
4059   // fold vector ops
4060   if (VT.isVector()) {
4061     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4062       return FoldedVOp;
4063 
4064     // fold (or x, 0) -> x, vector edition
4065     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4066       return N1;
4067     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4068       return N0;
4069 
4070     // fold (or x, -1) -> -1, vector edition
4071     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4072       // do not return N0, because undef node may exist in N0
4073       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4074     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4075       // do not return N1, because undef node may exist in N1
4076       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4077 
4078     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4079     // Do this only if the resulting shuffle is legal.
4080     if (isa<ShuffleVectorSDNode>(N0) &&
4081         isa<ShuffleVectorSDNode>(N1) &&
4082         // Avoid folding a node with illegal type.
4083         TLI.isTypeLegal(VT)) {
4084       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4085       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4086       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4087       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4088       // Ensure both shuffles have a zero input.
4089       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4090         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4091         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4092         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4093         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4094         bool CanFold = true;
4095         int NumElts = VT.getVectorNumElements();
4096         SmallVector<int, 4> Mask(NumElts);
4097 
4098         for (int i = 0; i != NumElts; ++i) {
4099           int M0 = SV0->getMaskElt(i);
4100           int M1 = SV1->getMaskElt(i);
4101 
4102           // Determine if either index is pointing to a zero vector.
4103           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4104           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4105 
4106           // If one element is zero and the otherside is undef, keep undef.
4107           // This also handles the case that both are undef.
4108           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4109             Mask[i] = -1;
4110             continue;
4111           }
4112 
4113           // Make sure only one of the elements is zero.
4114           if (M0Zero == M1Zero) {
4115             CanFold = false;
4116             break;
4117           }
4118 
4119           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4120 
4121           // We have a zero and non-zero element. If the non-zero came from
4122           // SV0 make the index a LHS index. If it came from SV1, make it
4123           // a RHS index. We need to mod by NumElts because we don't care
4124           // which operand it came from in the original shuffles.
4125           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4126         }
4127 
4128         if (CanFold) {
4129           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4130           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4131 
4132           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4133           if (!LegalMask) {
4134             std::swap(NewLHS, NewRHS);
4135             ShuffleVectorSDNode::commuteMask(Mask);
4136             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4137           }
4138 
4139           if (LegalMask)
4140             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4141         }
4142       }
4143     }
4144   }
4145 
4146   // fold (or c1, c2) -> c1|c2
4147   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4148   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4149   if (N0C && N1C && !N1C->isOpaque())
4150     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4151   // canonicalize constant to RHS
4152   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4153      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4154     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4155   // fold (or x, 0) -> x
4156   if (isNullConstant(N1))
4157     return N0;
4158   // fold (or x, -1) -> -1
4159   if (isAllOnesConstant(N1))
4160     return N1;
4161 
4162   if (SDValue NewSel = foldBinOpIntoSelect(N))
4163     return NewSel;
4164 
4165   // fold (or x, c) -> c iff (x & ~c) == 0
4166   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4167     return N1;
4168 
4169   if (SDValue Combined = visitORLike(N0, N1, N))
4170     return Combined;
4171 
4172   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4173   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4174     return BSwap;
4175   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4176     return BSwap;
4177 
4178   // reassociate or
4179   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4180     return ROR;
4181   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4182   // iff (c1 & c2) != 0.
4183   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4184              isa<ConstantSDNode>(N0.getOperand(1))) {
4185     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
4186     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
4187       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
4188                                                    N1C, C1))
4189         return DAG.getNode(
4190             ISD::AND, SDLoc(N), VT,
4191             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4192       return SDValue();
4193     }
4194   }
4195   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4196   if (N0.getOpcode() == N1.getOpcode())
4197     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4198       return Tmp;
4199 
4200   // See if this is some rotate idiom.
4201   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4202     return SDValue(Rot, 0);
4203 
4204   if (SDValue Load = MatchLoadCombine(N))
4205     return Load;
4206 
4207   // Simplify the operands using demanded-bits information.
4208   if (!VT.isVector() &&
4209       SimplifyDemandedBits(SDValue(N, 0)))
4210     return SDValue(N, 0);
4211 
4212   return SDValue();
4213 }
4214 
4215 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4216 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4217   if (Op.getOpcode() == ISD::AND) {
4218     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4219       Mask = Op.getOperand(1);
4220       Op = Op.getOperand(0);
4221     } else {
4222       return false;
4223     }
4224   }
4225 
4226   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4227     Shift = Op;
4228     return true;
4229   }
4230 
4231   return false;
4232 }
4233 
4234 // Return true if we can prove that, whenever Neg and Pos are both in the
4235 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4236 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4237 //
4238 //     (or (shift1 X, Neg), (shift2 X, Pos))
4239 //
4240 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4241 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4242 // to consider shift amounts with defined behavior.
4243 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4244   // If EltSize is a power of 2 then:
4245   //
4246   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4247   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4248   //
4249   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4250   // for the stronger condition:
4251   //
4252   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4253   //
4254   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4255   // we can just replace Neg with Neg' for the rest of the function.
4256   //
4257   // In other cases we check for the even stronger condition:
4258   //
4259   //     Neg == EltSize - Pos                                    [B]
4260   //
4261   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4262   // behavior if Pos == 0 (and consequently Neg == EltSize).
4263   //
4264   // We could actually use [A] whenever EltSize is a power of 2, but the
4265   // only extra cases that it would match are those uninteresting ones
4266   // where Neg and Pos are never in range at the same time.  E.g. for
4267   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4268   // as well as (sub 32, Pos), but:
4269   //
4270   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4271   //
4272   // always invokes undefined behavior for 32-bit X.
4273   //
4274   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4275   unsigned MaskLoBits = 0;
4276   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4277     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4278       if (NegC->getAPIntValue() == EltSize - 1) {
4279         Neg = Neg.getOperand(0);
4280         MaskLoBits = Log2_64(EltSize);
4281       }
4282     }
4283   }
4284 
4285   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4286   if (Neg.getOpcode() != ISD::SUB)
4287     return false;
4288   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4289   if (!NegC)
4290     return false;
4291   SDValue NegOp1 = Neg.getOperand(1);
4292 
4293   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4294   // Pos'.  The truncation is redundant for the purpose of the equality.
4295   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4296     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4297       if (PosC->getAPIntValue() == EltSize - 1)
4298         Pos = Pos.getOperand(0);
4299 
4300   // The condition we need is now:
4301   //
4302   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4303   //
4304   // If NegOp1 == Pos then we need:
4305   //
4306   //              EltSize & Mask == NegC & Mask
4307   //
4308   // (because "x & Mask" is a truncation and distributes through subtraction).
4309   APInt Width;
4310   if (Pos == NegOp1)
4311     Width = NegC->getAPIntValue();
4312 
4313   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4314   // Then the condition we want to prove becomes:
4315   //
4316   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4317   //
4318   // which, again because "x & Mask" is a truncation, becomes:
4319   //
4320   //                NegC & Mask == (EltSize - PosC) & Mask
4321   //             EltSize & Mask == (NegC + PosC) & Mask
4322   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4323     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4324       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4325     else
4326       return false;
4327   } else
4328     return false;
4329 
4330   // Now we just need to check that EltSize & Mask == Width & Mask.
4331   if (MaskLoBits)
4332     // EltSize & Mask is 0 since Mask is EltSize - 1.
4333     return Width.getLoBits(MaskLoBits) == 0;
4334   return Width == EltSize;
4335 }
4336 
4337 // A subroutine of MatchRotate used once we have found an OR of two opposite
4338 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4339 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4340 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4341 // Neg with outer conversions stripped away.
4342 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4343                                        SDValue Neg, SDValue InnerPos,
4344                                        SDValue InnerNeg, unsigned PosOpcode,
4345                                        unsigned NegOpcode, const SDLoc &DL) {
4346   // fold (or (shl x, (*ext y)),
4347   //          (srl x, (*ext (sub 32, y)))) ->
4348   //   (rotl x, y) or (rotr x, (sub 32, y))
4349   //
4350   // fold (or (shl x, (*ext (sub 32, y))),
4351   //          (srl x, (*ext y))) ->
4352   //   (rotr x, y) or (rotl x, (sub 32, y))
4353   EVT VT = Shifted.getValueType();
4354   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4355     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4356     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4357                        HasPos ? Pos : Neg).getNode();
4358   }
4359 
4360   return nullptr;
4361 }
4362 
4363 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4364 // idioms for rotate, and if the target supports rotation instructions, generate
4365 // a rot[lr].
4366 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4367   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4368   EVT VT = LHS.getValueType();
4369   if (!TLI.isTypeLegal(VT)) return nullptr;
4370 
4371   // The target must have at least one rotate flavor.
4372   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4373   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4374   if (!HasROTL && !HasROTR) return nullptr;
4375 
4376   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4377   SDValue LHSShift;   // The shift.
4378   SDValue LHSMask;    // AND value if any.
4379   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4380     return nullptr; // Not part of a rotate.
4381 
4382   SDValue RHSShift;   // The shift.
4383   SDValue RHSMask;    // AND value if any.
4384   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4385     return nullptr; // Not part of a rotate.
4386 
4387   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4388     return nullptr;   // Not shifting the same value.
4389 
4390   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4391     return nullptr;   // Shifts must disagree.
4392 
4393   // Canonicalize shl to left side in a shl/srl pair.
4394   if (RHSShift.getOpcode() == ISD::SHL) {
4395     std::swap(LHS, RHS);
4396     std::swap(LHSShift, RHSShift);
4397     std::swap(LHSMask, RHSMask);
4398   }
4399 
4400   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4401   SDValue LHSShiftArg = LHSShift.getOperand(0);
4402   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4403   SDValue RHSShiftArg = RHSShift.getOperand(0);
4404   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4405 
4406   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4407   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4408   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4409     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4410     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4411     if ((LShVal + RShVal) != EltSizeInBits)
4412       return nullptr;
4413 
4414     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4415                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4416 
4417     // If there is an AND of either shifted operand, apply it to the result.
4418     if (LHSMask.getNode() || RHSMask.getNode()) {
4419       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
4420 
4421       if (LHSMask.getNode()) {
4422         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4423         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4424                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
4425                                        DAG.getConstant(RHSBits, DL, VT)));
4426       }
4427       if (RHSMask.getNode()) {
4428         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4429         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4430                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
4431                                        DAG.getConstant(LHSBits, DL, VT)));
4432       }
4433 
4434       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4435     }
4436 
4437     return Rot.getNode();
4438   }
4439 
4440   // If there is a mask here, and we have a variable shift, we can't be sure
4441   // that we're masking out the right stuff.
4442   if (LHSMask.getNode() || RHSMask.getNode())
4443     return nullptr;
4444 
4445   // If the shift amount is sign/zext/any-extended just peel it off.
4446   SDValue LExtOp0 = LHSShiftAmt;
4447   SDValue RExtOp0 = RHSShiftAmt;
4448   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4449        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4450        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4451        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4452       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4453        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4454        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4455        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4456     LExtOp0 = LHSShiftAmt.getOperand(0);
4457     RExtOp0 = RHSShiftAmt.getOperand(0);
4458   }
4459 
4460   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4461                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4462   if (TryL)
4463     return TryL;
4464 
4465   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4466                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4467   if (TryR)
4468     return TryR;
4469 
4470   return nullptr;
4471 }
4472 
4473 namespace {
4474 /// Helper struct to parse and store a memory address as base + index + offset.
4475 /// We ignore sign extensions when it is safe to do so.
4476 /// The following two expressions are not equivalent. To differentiate we need
4477 /// to store whether there was a sign extension involved in the index
4478 /// computation.
4479 ///  (load (i64 add (i64 copyfromreg %c)
4480 ///                 (i64 signextend (add (i8 load %index)
4481 ///                                      (i8 1))))
4482 /// vs
4483 ///
4484 /// (load (i64 add (i64 copyfromreg %c)
4485 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
4486 ///                                         (i32 1)))))
4487 struct BaseIndexOffset {
4488   SDValue Base;
4489   SDValue Index;
4490   int64_t Offset;
4491   bool IsIndexSignExt;
4492 
4493   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
4494 
4495   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
4496                   bool IsIndexSignExt) :
4497     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
4498 
4499   bool equalBaseIndex(const BaseIndexOffset &Other) {
4500     return Other.Base == Base && Other.Index == Index &&
4501       Other.IsIndexSignExt == IsIndexSignExt;
4502   }
4503 
4504   /// Parses tree in Ptr for base, index, offset addresses.
4505   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
4506                                int64_t PartialOffset = 0) {
4507     bool IsIndexSignExt = false;
4508 
4509     // Split up a folded GlobalAddress+Offset into its component parts.
4510     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
4511       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
4512         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
4513                                                     SDLoc(GA),
4514                                                     GA->getValueType(0),
4515                                                     /*Offset=*/PartialOffset,
4516                                                     /*isTargetGA=*/false,
4517                                                     GA->getTargetFlags()),
4518                                SDValue(),
4519                                GA->getOffset(),
4520                                IsIndexSignExt);
4521       }
4522 
4523     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
4524     // instruction, then it could be just the BASE or everything else we don't
4525     // know how to handle. Just use Ptr as BASE and give up.
4526     if (Ptr->getOpcode() != ISD::ADD)
4527       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4528 
4529     // We know that we have at least an ADD instruction. Try to pattern match
4530     // the simple case of BASE + OFFSET.
4531     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
4532       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
4533       return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
4534     }
4535 
4536     // Inside a loop the current BASE pointer is calculated using an ADD and a
4537     // MUL instruction. In this case Ptr is the actual BASE pointer.
4538     // (i64 add (i64 %array_ptr)
4539     //          (i64 mul (i64 %induction_var)
4540     //                   (i64 %element_size)))
4541     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
4542       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4543 
4544     // Look at Base + Index + Offset cases.
4545     SDValue Base = Ptr->getOperand(0);
4546     SDValue IndexOffset = Ptr->getOperand(1);
4547 
4548     // Skip signextends.
4549     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
4550       IndexOffset = IndexOffset->getOperand(0);
4551       IsIndexSignExt = true;
4552     }
4553 
4554     // Either the case of Base + Index (no offset) or something else.
4555     if (IndexOffset->getOpcode() != ISD::ADD)
4556       return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
4557 
4558     // Now we have the case of Base + Index + offset.
4559     SDValue Index = IndexOffset->getOperand(0);
4560     SDValue Offset = IndexOffset->getOperand(1);
4561 
4562     if (!isa<ConstantSDNode>(Offset))
4563       return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4564 
4565     // Ignore signextends.
4566     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
4567       Index = Index->getOperand(0);
4568       IsIndexSignExt = true;
4569     } else IsIndexSignExt = false;
4570 
4571     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
4572     return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
4573   }
4574 };
4575 } // namespace
4576 
4577 namespace {
4578 /// Represents known origin of an individual byte in load combine pattern. The
4579 /// value of the byte is either constant zero or comes from memory.
4580 struct ByteProvider {
4581   // For constant zero providers Load is set to nullptr. For memory providers
4582   // Load represents the node which loads the byte from memory.
4583   // ByteOffset is the offset of the byte in the value produced by the load.
4584   LoadSDNode *Load;
4585   unsigned ByteOffset;
4586 
4587   ByteProvider() : Load(nullptr), ByteOffset(0) {}
4588 
4589   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4590     return ByteProvider(Load, ByteOffset);
4591   }
4592   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4593 
4594   bool isConstantZero() const { return !Load; }
4595   bool isMemory() const { return Load; }
4596 
4597   bool operator==(const ByteProvider &Other) const {
4598     return Other.Load == Load && Other.ByteOffset == ByteOffset;
4599   }
4600 
4601 private:
4602   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4603       : Load(Load), ByteOffset(ByteOffset) {}
4604 };
4605 
4606 /// Recursively traverses the expression calculating the origin of the requested
4607 /// byte of the given value. Returns None if the provider can't be calculated.
4608 ///
4609 /// For all the values except the root of the expression verifies that the value
4610 /// has exactly one use and if it's not true return None. This way if the origin
4611 /// of the byte is returned it's guaranteed that the values which contribute to
4612 /// the byte are not used outside of this expression.
4613 ///
4614 /// Because the parts of the expression are not allowed to have more than one
4615 /// use this function iterates over trees, not DAGs. So it never visits the same
4616 /// node more than once.
4617 const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
4618                                                    unsigned Depth,
4619                                                    bool Root = false) {
4620   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4621   if (Depth == 10)
4622     return None;
4623 
4624   if (!Root && !Op.hasOneUse())
4625     return None;
4626 
4627   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4628   unsigned BitWidth = Op.getValueSizeInBits();
4629   if (BitWidth % 8 != 0)
4630     return None;
4631   unsigned ByteWidth = BitWidth / 8;
4632   assert(Index < ByteWidth && "invalid index requested");
4633   (void) ByteWidth;
4634 
4635   switch (Op.getOpcode()) {
4636   case ISD::OR: {
4637     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4638     if (!LHS)
4639       return None;
4640     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4641     if (!RHS)
4642       return None;
4643 
4644     if (LHS->isConstantZero())
4645       return RHS;
4646     if (RHS->isConstantZero())
4647       return LHS;
4648     return None;
4649   }
4650   case ISD::SHL: {
4651     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4652     if (!ShiftOp)
4653       return None;
4654 
4655     uint64_t BitShift = ShiftOp->getZExtValue();
4656     if (BitShift % 8 != 0)
4657       return None;
4658     uint64_t ByteShift = BitShift / 8;
4659 
4660     return Index < ByteShift
4661                ? ByteProvider::getConstantZero()
4662                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4663                                        Depth + 1);
4664   }
4665   case ISD::ANY_EXTEND:
4666   case ISD::SIGN_EXTEND:
4667   case ISD::ZERO_EXTEND: {
4668     SDValue NarrowOp = Op->getOperand(0);
4669     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4670     if (NarrowBitWidth % 8 != 0)
4671       return None;
4672     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4673 
4674     if (Index >= NarrowByteWidth)
4675       return Op.getOpcode() == ISD::ZERO_EXTEND
4676                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4677                  : None;
4678     return calculateByteProvider(NarrowOp, Index, Depth + 1);
4679   }
4680   case ISD::BSWAP:
4681     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4682                                  Depth + 1);
4683   case ISD::LOAD: {
4684     auto L = cast<LoadSDNode>(Op.getNode());
4685     if (L->isVolatile() || L->isIndexed())
4686       return None;
4687 
4688     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
4689     if (NarrowBitWidth % 8 != 0)
4690       return None;
4691     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4692 
4693     if (Index >= NarrowByteWidth)
4694       return L->getExtensionType() == ISD::ZEXTLOAD
4695                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4696                  : None;
4697     return ByteProvider::getMemory(L, Index);
4698   }
4699   }
4700 
4701   return None;
4702 }
4703 } // namespace
4704 
4705 /// Match a pattern where a wide type scalar value is loaded by several narrow
4706 /// loads and combined by shifts and ors. Fold it into a single load or a load
4707 /// and a BSWAP if the targets supports it.
4708 ///
4709 /// Assuming little endian target:
4710 ///  i8 *a = ...
4711 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4712 /// =>
4713 ///  i32 val = *((i32)a)
4714 ///
4715 ///  i8 *a = ...
4716 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4717 /// =>
4718 ///  i32 val = BSWAP(*((i32)a))
4719 ///
4720 /// TODO: This rule matches complex patterns with OR node roots and doesn't
4721 /// interact well with the worklist mechanism. When a part of the pattern is
4722 /// updated (e.g. one of the loads) its direct users are put into the worklist,
4723 /// but the root node of the pattern which triggers the load combine is not
4724 /// necessarily a direct user of the changed node. For example, once the address
4725 /// of t28 load is reassociated load combine won't be triggered:
4726 ///             t25: i32 = add t4, Constant:i32<2>
4727 ///           t26: i64 = sign_extend t25
4728 ///        t27: i64 = add t2, t26
4729 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
4730 ///     t29: i32 = zero_extend t28
4731 ///   t32: i32 = shl t29, Constant:i8<8>
4732 /// t33: i32 = or t23, t32
4733 /// As a possible fix visitLoad can check if the load can be a part of a load
4734 /// combine pattern and add corresponding OR roots to the worklist.
4735 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
4736   assert(N->getOpcode() == ISD::OR &&
4737          "Can only match load combining against OR nodes");
4738 
4739   // Handles simple types only
4740   EVT VT = N->getValueType(0);
4741   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
4742     return SDValue();
4743   unsigned ByteWidth = VT.getSizeInBits() / 8;
4744 
4745   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4746   // Before legalize we can introduce too wide illegal loads which will be later
4747   // split into legal sized loads. This enables us to combine i64 load by i8
4748   // patterns to a couple of i32 loads on 32 bit targets.
4749   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
4750     return SDValue();
4751 
4752   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
4753     unsigned BW, unsigned i) { return i; };
4754   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
4755     unsigned BW, unsigned i) { return BW - i - 1; };
4756 
4757   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
4758   auto MemoryByteOffset = [&] (ByteProvider P) {
4759     assert(P.isMemory() && "Must be a memory byte provider");
4760     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
4761     assert(LoadBitWidth % 8 == 0 &&
4762            "can only analyze providers for individual bytes not bit");
4763     unsigned LoadByteWidth = LoadBitWidth / 8;
4764     return IsBigEndianTarget
4765             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
4766             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
4767   };
4768 
4769   Optional<BaseIndexOffset> Base;
4770   SDValue Chain;
4771 
4772   SmallSet<LoadSDNode *, 8> Loads;
4773   Optional<ByteProvider> FirstByteProvider;
4774   int64_t FirstOffset = INT64_MAX;
4775 
4776   // Check if all the bytes of the OR we are looking at are loaded from the same
4777   // base address. Collect bytes offsets from Base address in ByteOffsets.
4778   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
4779   for (unsigned i = 0; i < ByteWidth; i++) {
4780     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
4781     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
4782       return SDValue();
4783 
4784     LoadSDNode *L = P->Load;
4785     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
4786            "Must be enforced by calculateByteProvider");
4787     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
4788 
4789     // All loads must share the same chain
4790     SDValue LChain = L->getChain();
4791     if (!Chain)
4792       Chain = LChain;
4793     else if (Chain != LChain)
4794       return SDValue();
4795 
4796     // Loads must share the same base address
4797     BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
4798     if (!Base)
4799       Base = Ptr;
4800     else if (!Base->equalBaseIndex(Ptr))
4801       return SDValue();
4802 
4803     // Calculate the offset of the current byte from the base address
4804     int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
4805     ByteOffsets[i] = ByteOffsetFromBase;
4806 
4807     // Remember the first byte load
4808     if (ByteOffsetFromBase < FirstOffset) {
4809       FirstByteProvider = P;
4810       FirstOffset = ByteOffsetFromBase;
4811     }
4812 
4813     Loads.insert(L);
4814   }
4815   assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
4816          "memory, so there must be at least one load which produces the value");
4817   assert(Base && "Base address of the accessed memory location must be set");
4818   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
4819 
4820   // Check if the bytes of the OR we are looking at match with either big or
4821   // little endian value load
4822   bool BigEndian = true, LittleEndian = true;
4823   for (unsigned i = 0; i < ByteWidth; i++) {
4824     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
4825     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
4826     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
4827     if (!BigEndian && !LittleEndian)
4828       return SDValue();
4829   }
4830   assert((BigEndian != LittleEndian) && "should be either or");
4831   assert(FirstByteProvider && "must be set");
4832 
4833   // Ensure that the first byte is loaded from zero offset of the first load.
4834   // So the combined value can be loaded from the first load address.
4835   if (MemoryByteOffset(*FirstByteProvider) != 0)
4836     return SDValue();
4837   LoadSDNode *FirstLoad = FirstByteProvider->Load;
4838 
4839   // The node we are looking at matches with the pattern, check if we can
4840   // replace it with a single load and bswap if needed.
4841 
4842   // If the load needs byte swap check if the target supports it
4843   bool NeedsBswap = IsBigEndianTarget != BigEndian;
4844 
4845   // Before legalize we can introduce illegal bswaps which will be later
4846   // converted to an explicit bswap sequence. This way we end up with a single
4847   // load and byte shuffling instead of several loads and byte shuffling.
4848   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
4849     return SDValue();
4850 
4851   // Check that a load of the wide type is both allowed and fast on the target
4852   bool Fast = false;
4853   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
4854                                         VT, FirstLoad->getAddressSpace(),
4855                                         FirstLoad->getAlignment(), &Fast);
4856   if (!Allowed || !Fast)
4857     return SDValue();
4858 
4859   SDValue NewLoad =
4860       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
4861                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
4862 
4863   // Transfer chain users from old loads to the new load.
4864   for (LoadSDNode *L : Loads)
4865     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
4866 
4867   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
4868 }
4869 
4870 SDValue DAGCombiner::visitXOR(SDNode *N) {
4871   SDValue N0 = N->getOperand(0);
4872   SDValue N1 = N->getOperand(1);
4873   EVT VT = N0.getValueType();
4874 
4875   // fold vector ops
4876   if (VT.isVector()) {
4877     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4878       return FoldedVOp;
4879 
4880     // fold (xor x, 0) -> x, vector edition
4881     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4882       return N1;
4883     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4884       return N0;
4885   }
4886 
4887   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4888   if (N0.isUndef() && N1.isUndef())
4889     return DAG.getConstant(0, SDLoc(N), VT);
4890   // fold (xor x, undef) -> undef
4891   if (N0.isUndef())
4892     return N0;
4893   if (N1.isUndef())
4894     return N1;
4895   // fold (xor c1, c2) -> c1^c2
4896   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4897   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4898   if (N0C && N1C)
4899     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4900   // canonicalize constant to RHS
4901   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4902      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4903     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4904   // fold (xor x, 0) -> x
4905   if (isNullConstant(N1))
4906     return N0;
4907 
4908   if (SDValue NewSel = foldBinOpIntoSelect(N))
4909     return NewSel;
4910 
4911   // reassociate xor
4912   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4913     return RXOR;
4914 
4915   // fold !(x cc y) -> (x !cc y)
4916   SDValue LHS, RHS, CC;
4917   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4918     bool isInt = LHS.getValueType().isInteger();
4919     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4920                                                isInt);
4921 
4922     if (!LegalOperations ||
4923         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4924       switch (N0.getOpcode()) {
4925       default:
4926         llvm_unreachable("Unhandled SetCC Equivalent!");
4927       case ISD::SETCC:
4928         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
4929       case ISD::SELECT_CC:
4930         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
4931                                N0.getOperand(3), NotCC);
4932       }
4933     }
4934   }
4935 
4936   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4937   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4938       N0.getNode()->hasOneUse() &&
4939       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4940     SDValue V = N0.getOperand(0);
4941     SDLoc DL(N0);
4942     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4943                     DAG.getConstant(1, DL, V.getValueType()));
4944     AddToWorklist(V.getNode());
4945     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4946   }
4947 
4948   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4949   if (isOneConstant(N1) && VT == MVT::i1 &&
4950       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4951     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4952     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4953       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4954       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4955       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4956       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4957       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4958     }
4959   }
4960   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4961   if (isAllOnesConstant(N1) &&
4962       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4963     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4964     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4965       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4966       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4967       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4968       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4969       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4970     }
4971   }
4972   // fold (xor (and x, y), y) -> (and (not x), y)
4973   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4974       N0->getOperand(1) == N1) {
4975     SDValue X = N0->getOperand(0);
4976     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4977     AddToWorklist(NotX.getNode());
4978     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4979   }
4980   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4981   if (N1C && N0.getOpcode() == ISD::XOR) {
4982     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4983       SDLoc DL(N);
4984       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4985                          DAG.getConstant(N1C->getAPIntValue() ^
4986                                          N00C->getAPIntValue(), DL, VT));
4987     }
4988     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4989       SDLoc DL(N);
4990       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4991                          DAG.getConstant(N1C->getAPIntValue() ^
4992                                          N01C->getAPIntValue(), DL, VT));
4993     }
4994   }
4995 
4996   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
4997   unsigned OpSizeInBits = VT.getScalarSizeInBits();
4998   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
4999       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5000       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5001     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5002       if (C->getAPIntValue() == (OpSizeInBits - 1))
5003         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5004   }
5005 
5006   // fold (xor x, x) -> 0
5007   if (N0 == N1)
5008     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5009 
5010   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5011   // Here is a concrete example of this equivalence:
5012   // i16   x ==  14
5013   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5014   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5015   //
5016   // =>
5017   //
5018   // i16     ~1      == 0b1111111111111110
5019   // i16 rol(~1, 14) == 0b1011111111111111
5020   //
5021   // Some additional tips to help conceptualize this transform:
5022   // - Try to see the operation as placing a single zero in a value of all ones.
5023   // - There exists no value for x which would allow the result to contain zero.
5024   // - Values of x larger than the bitwidth are undefined and do not require a
5025   //   consistent result.
5026   // - Pushing the zero left requires shifting one bits in from the right.
5027   // A rotate left of ~1 is a nice way of achieving the desired result.
5028   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5029       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5030     SDLoc DL(N);
5031     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5032                        N0.getOperand(1));
5033   }
5034 
5035   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5036   if (N0.getOpcode() == N1.getOpcode())
5037     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5038       return Tmp;
5039 
5040   // Simplify the expression using non-local knowledge.
5041   if (!VT.isVector() &&
5042       SimplifyDemandedBits(SDValue(N, 0)))
5043     return SDValue(N, 0);
5044 
5045   return SDValue();
5046 }
5047 
5048 /// Handle transforms common to the three shifts, when the shift amount is a
5049 /// constant.
5050 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5051   SDNode *LHS = N->getOperand(0).getNode();
5052   if (!LHS->hasOneUse()) return SDValue();
5053 
5054   // We want to pull some binops through shifts, so that we have (and (shift))
5055   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5056   // thing happens with address calculations, so it's important to canonicalize
5057   // it.
5058   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5059 
5060   switch (LHS->getOpcode()) {
5061   default: return SDValue();
5062   case ISD::OR:
5063   case ISD::XOR:
5064     HighBitSet = false; // We can only transform sra if the high bit is clear.
5065     break;
5066   case ISD::AND:
5067     HighBitSet = true;  // We can only transform sra if the high bit is set.
5068     break;
5069   case ISD::ADD:
5070     if (N->getOpcode() != ISD::SHL)
5071       return SDValue(); // only shl(add) not sr[al](add).
5072     HighBitSet = false; // We can only transform sra if the high bit is clear.
5073     break;
5074   }
5075 
5076   // We require the RHS of the binop to be a constant and not opaque as well.
5077   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5078   if (!BinOpCst) return SDValue();
5079 
5080   // FIXME: disable this unless the input to the binop is a shift by a constant
5081   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5082   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5083   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5084                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5085                  BinOpLHSVal->getOpcode() == ISD::SRL;
5086   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5087                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5088 
5089   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5090       !isCopyOrSelect)
5091     return SDValue();
5092 
5093   if (isCopyOrSelect && N->hasOneUse())
5094     return SDValue();
5095 
5096   EVT VT = N->getValueType(0);
5097 
5098   // If this is a signed shift right, and the high bit is modified by the
5099   // logical operation, do not perform the transformation. The highBitSet
5100   // boolean indicates the value of the high bit of the constant which would
5101   // cause it to be modified for this operation.
5102   if (N->getOpcode() == ISD::SRA) {
5103     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5104     if (BinOpRHSSignSet != HighBitSet)
5105       return SDValue();
5106   }
5107 
5108   if (!TLI.isDesirableToCommuteWithShift(LHS))
5109     return SDValue();
5110 
5111   // Fold the constants, shifting the binop RHS by the shift amount.
5112   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5113                                N->getValueType(0),
5114                                LHS->getOperand(1), N->getOperand(1));
5115   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5116 
5117   // Create the new shift.
5118   SDValue NewShift = DAG.getNode(N->getOpcode(),
5119                                  SDLoc(LHS->getOperand(0)),
5120                                  VT, LHS->getOperand(0), N->getOperand(1));
5121 
5122   // Create the new binop.
5123   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5124 }
5125 
5126 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5127   assert(N->getOpcode() == ISD::TRUNCATE);
5128   assert(N->getOperand(0).getOpcode() == ISD::AND);
5129 
5130   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5131   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5132     SDValue N01 = N->getOperand(0).getOperand(1);
5133     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5134       SDLoc DL(N);
5135       EVT TruncVT = N->getValueType(0);
5136       SDValue N00 = N->getOperand(0).getOperand(0);
5137       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5138       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5139       AddToWorklist(Trunc00.getNode());
5140       AddToWorklist(Trunc01.getNode());
5141       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5142     }
5143   }
5144 
5145   return SDValue();
5146 }
5147 
5148 SDValue DAGCombiner::visitRotate(SDNode *N) {
5149   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5150   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
5151       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
5152     if (SDValue NewOp1 =
5153             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
5154       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
5155                          N->getOperand(0), NewOp1);
5156   }
5157   return SDValue();
5158 }
5159 
5160 SDValue DAGCombiner::visitSHL(SDNode *N) {
5161   SDValue N0 = N->getOperand(0);
5162   SDValue N1 = N->getOperand(1);
5163   EVT VT = N0.getValueType();
5164   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5165 
5166   // fold vector ops
5167   if (VT.isVector()) {
5168     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5169       return FoldedVOp;
5170 
5171     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5172     // If setcc produces all-one true value then:
5173     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5174     if (N1CV && N1CV->isConstant()) {
5175       if (N0.getOpcode() == ISD::AND) {
5176         SDValue N00 = N0->getOperand(0);
5177         SDValue N01 = N0->getOperand(1);
5178         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5179 
5180         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5181             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5182                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5183           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5184                                                      N01CV, N1CV))
5185             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5186         }
5187       }
5188     }
5189   }
5190 
5191   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5192 
5193   // fold (shl c1, c2) -> c1<<c2
5194   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5195   if (N0C && N1C && !N1C->isOpaque())
5196     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5197   // fold (shl 0, x) -> 0
5198   if (isNullConstant(N0))
5199     return N0;
5200   // fold (shl x, c >= size(x)) -> undef
5201   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5202     return DAG.getUNDEF(VT);
5203   // fold (shl x, 0) -> x
5204   if (N1C && N1C->isNullValue())
5205     return N0;
5206   // fold (shl undef, x) -> 0
5207   if (N0.isUndef())
5208     return DAG.getConstant(0, SDLoc(N), VT);
5209 
5210   if (SDValue NewSel = foldBinOpIntoSelect(N))
5211     return NewSel;
5212 
5213   // if (shl x, c) is known to be zero, return 0
5214   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5215                             APInt::getAllOnesValue(OpSizeInBits)))
5216     return DAG.getConstant(0, SDLoc(N), VT);
5217   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5218   if (N1.getOpcode() == ISD::TRUNCATE &&
5219       N1.getOperand(0).getOpcode() == ISD::AND) {
5220     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5221       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5222   }
5223 
5224   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5225     return SDValue(N, 0);
5226 
5227   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5228   if (N1C && N0.getOpcode() == ISD::SHL) {
5229     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5230       SDLoc DL(N);
5231       APInt c1 = N0C1->getAPIntValue();
5232       APInt c2 = N1C->getAPIntValue();
5233       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5234 
5235       APInt Sum = c1 + c2;
5236       if (Sum.uge(OpSizeInBits))
5237         return DAG.getConstant(0, DL, VT);
5238 
5239       return DAG.getNode(
5240           ISD::SHL, DL, VT, N0.getOperand(0),
5241           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5242     }
5243   }
5244 
5245   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5246   // For this to be valid, the second form must not preserve any of the bits
5247   // that are shifted out by the inner shift in the first form.  This means
5248   // the outer shift size must be >= the number of bits added by the ext.
5249   // As a corollary, we don't care what kind of ext it is.
5250   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5251               N0.getOpcode() == ISD::ANY_EXTEND ||
5252               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5253       N0.getOperand(0).getOpcode() == ISD::SHL) {
5254     SDValue N0Op0 = N0.getOperand(0);
5255     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5256       APInt c1 = N0Op0C1->getAPIntValue();
5257       APInt c2 = N1C->getAPIntValue();
5258       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5259 
5260       EVT InnerShiftVT = N0Op0.getValueType();
5261       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5262       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5263         SDLoc DL(N0);
5264         APInt Sum = c1 + c2;
5265         if (Sum.uge(OpSizeInBits))
5266           return DAG.getConstant(0, DL, VT);
5267 
5268         return DAG.getNode(
5269             ISD::SHL, DL, VT,
5270             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5271             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5272       }
5273     }
5274   }
5275 
5276   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5277   // Only fold this if the inner zext has no other uses to avoid increasing
5278   // the total number of instructions.
5279   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5280       N0.getOperand(0).getOpcode() == ISD::SRL) {
5281     SDValue N0Op0 = N0.getOperand(0);
5282     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5283       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5284         uint64_t c1 = N0Op0C1->getZExtValue();
5285         uint64_t c2 = N1C->getZExtValue();
5286         if (c1 == c2) {
5287           SDValue NewOp0 = N0.getOperand(0);
5288           EVT CountVT = NewOp0.getOperand(1).getValueType();
5289           SDLoc DL(N);
5290           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5291                                        NewOp0,
5292                                        DAG.getConstant(c2, DL, CountVT));
5293           AddToWorklist(NewSHL.getNode());
5294           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5295         }
5296       }
5297     }
5298   }
5299 
5300   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5301   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5302   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5303       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
5304     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5305       uint64_t C1 = N0C1->getZExtValue();
5306       uint64_t C2 = N1C->getZExtValue();
5307       SDLoc DL(N);
5308       if (C1 <= C2)
5309         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5310                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5311       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5312                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5313     }
5314   }
5315 
5316   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5317   //                               (and (srl x, (sub c1, c2), MASK)
5318   // Only fold this if the inner shift has no other uses -- if it does, folding
5319   // this will increase the total number of instructions.
5320   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5321     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5322       uint64_t c1 = N0C1->getZExtValue();
5323       if (c1 < OpSizeInBits) {
5324         uint64_t c2 = N1C->getZExtValue();
5325         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5326         SDValue Shift;
5327         if (c2 > c1) {
5328           Mask = Mask.shl(c2 - c1);
5329           SDLoc DL(N);
5330           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5331                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5332         } else {
5333           Mask = Mask.lshr(c1 - c2);
5334           SDLoc DL(N);
5335           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5336                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5337         }
5338         SDLoc DL(N0);
5339         return DAG.getNode(ISD::AND, DL, VT, Shift,
5340                            DAG.getConstant(Mask, DL, VT));
5341       }
5342     }
5343   }
5344 
5345   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5346   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5347       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5348     SDLoc DL(N);
5349     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5350     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5351     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5352   }
5353 
5354   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5355   // Variant of version done on multiply, except mul by a power of 2 is turned
5356   // into a shift.
5357   if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
5358       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5359       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5360     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5361     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5362     AddToWorklist(Shl0.getNode());
5363     AddToWorklist(Shl1.getNode());
5364     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
5365   }
5366 
5367   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5368   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5369       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5370       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5371     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5372     if (isConstantOrConstantVector(Shl))
5373       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5374   }
5375 
5376   if (N1C && !N1C->isOpaque())
5377     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5378       return NewSHL;
5379 
5380   return SDValue();
5381 }
5382 
5383 SDValue DAGCombiner::visitSRA(SDNode *N) {
5384   SDValue N0 = N->getOperand(0);
5385   SDValue N1 = N->getOperand(1);
5386   EVT VT = N0.getValueType();
5387   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5388 
5389   // Arithmetic shifting an all-sign-bit value is a no-op.
5390   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5391     return N0;
5392 
5393   // fold vector ops
5394   if (VT.isVector())
5395     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5396       return FoldedVOp;
5397 
5398   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5399 
5400   // fold (sra c1, c2) -> (sra c1, c2)
5401   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5402   if (N0C && N1C && !N1C->isOpaque())
5403     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5404   // fold (sra 0, x) -> 0
5405   if (isNullConstant(N0))
5406     return N0;
5407   // fold (sra -1, x) -> -1
5408   if (isAllOnesConstant(N0))
5409     return N0;
5410   // fold (sra x, c >= size(x)) -> undef
5411   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5412     return DAG.getUNDEF(VT);
5413   // fold (sra x, 0) -> x
5414   if (N1C && N1C->isNullValue())
5415     return N0;
5416 
5417   if (SDValue NewSel = foldBinOpIntoSelect(N))
5418     return NewSel;
5419 
5420   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5421   // sext_inreg.
5422   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5423     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5424     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5425     if (VT.isVector())
5426       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5427                                ExtVT, VT.getVectorNumElements());
5428     if ((!LegalOperations ||
5429          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5430       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5431                          N0.getOperand(0), DAG.getValueType(ExtVT));
5432   }
5433 
5434   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5435   if (N1C && N0.getOpcode() == ISD::SRA) {
5436     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5437       SDLoc DL(N);
5438       APInt c1 = N0C1->getAPIntValue();
5439       APInt c2 = N1C->getAPIntValue();
5440       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5441 
5442       APInt Sum = c1 + c2;
5443       if (Sum.uge(OpSizeInBits))
5444         Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
5445 
5446       return DAG.getNode(
5447           ISD::SRA, DL, VT, N0.getOperand(0),
5448           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5449     }
5450   }
5451 
5452   // fold (sra (shl X, m), (sub result_size, n))
5453   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5454   // result_size - n != m.
5455   // If truncate is free for the target sext(shl) is likely to result in better
5456   // code.
5457   if (N0.getOpcode() == ISD::SHL && N1C) {
5458     // Get the two constanst of the shifts, CN0 = m, CN = n.
5459     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5460     if (N01C) {
5461       LLVMContext &Ctx = *DAG.getContext();
5462       // Determine what the truncate's result bitsize and type would be.
5463       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5464 
5465       if (VT.isVector())
5466         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5467 
5468       // Determine the residual right-shift amount.
5469       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5470 
5471       // If the shift is not a no-op (in which case this should be just a sign
5472       // extend already), the truncated to type is legal, sign_extend is legal
5473       // on that type, and the truncate to that type is both legal and free,
5474       // perform the transform.
5475       if ((ShiftAmt > 0) &&
5476           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5477           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5478           TLI.isTruncateFree(VT, TruncVT)) {
5479 
5480         SDLoc DL(N);
5481         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5482             getShiftAmountTy(N0.getOperand(0).getValueType()));
5483         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5484                                     N0.getOperand(0), Amt);
5485         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5486                                     Shift);
5487         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5488                            N->getValueType(0), Trunc);
5489       }
5490     }
5491   }
5492 
5493   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5494   if (N1.getOpcode() == ISD::TRUNCATE &&
5495       N1.getOperand(0).getOpcode() == ISD::AND) {
5496     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5497       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5498   }
5499 
5500   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5501   //      if c1 is equal to the number of bits the trunc removes
5502   if (N0.getOpcode() == ISD::TRUNCATE &&
5503       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5504        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5505       N0.getOperand(0).hasOneUse() &&
5506       N0.getOperand(0).getOperand(1).hasOneUse() &&
5507       N1C) {
5508     SDValue N0Op0 = N0.getOperand(0);
5509     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5510       unsigned LargeShiftVal = LargeShift->getZExtValue();
5511       EVT LargeVT = N0Op0.getValueType();
5512 
5513       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5514         SDLoc DL(N);
5515         SDValue Amt =
5516           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5517                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5518         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5519                                   N0Op0.getOperand(0), Amt);
5520         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5521       }
5522     }
5523   }
5524 
5525   // Simplify, based on bits shifted out of the LHS.
5526   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5527     return SDValue(N, 0);
5528 
5529 
5530   // If the sign bit is known to be zero, switch this to a SRL.
5531   if (DAG.SignBitIsZero(N0))
5532     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
5533 
5534   if (N1C && !N1C->isOpaque())
5535     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
5536       return NewSRA;
5537 
5538   return SDValue();
5539 }
5540 
5541 SDValue DAGCombiner::visitSRL(SDNode *N) {
5542   SDValue N0 = N->getOperand(0);
5543   SDValue N1 = N->getOperand(1);
5544   EVT VT = N0.getValueType();
5545   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5546 
5547   // fold vector ops
5548   if (VT.isVector())
5549     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5550       return FoldedVOp;
5551 
5552   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5553 
5554   // fold (srl c1, c2) -> c1 >>u c2
5555   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5556   if (N0C && N1C && !N1C->isOpaque())
5557     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5558   // fold (srl 0, x) -> 0
5559   if (isNullConstant(N0))
5560     return N0;
5561   // fold (srl x, c >= size(x)) -> undef
5562   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5563     return DAG.getUNDEF(VT);
5564   // fold (srl x, 0) -> x
5565   if (N1C && N1C->isNullValue())
5566     return N0;
5567 
5568   if (SDValue NewSel = foldBinOpIntoSelect(N))
5569     return NewSel;
5570 
5571   // if (srl x, c) is known to be zero, return 0
5572   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5573                                    APInt::getAllOnesValue(OpSizeInBits)))
5574     return DAG.getConstant(0, SDLoc(N), VT);
5575 
5576   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5577   if (N1C && N0.getOpcode() == ISD::SRL) {
5578     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5579       SDLoc DL(N);
5580       APInt c1 = N0C1->getAPIntValue();
5581       APInt c2 = N1C->getAPIntValue();
5582       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5583 
5584       APInt Sum = c1 + c2;
5585       if (Sum.uge(OpSizeInBits))
5586         return DAG.getConstant(0, DL, VT);
5587 
5588       return DAG.getNode(
5589           ISD::SRL, DL, VT, N0.getOperand(0),
5590           DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5591     }
5592   }
5593 
5594   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5595   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5596       N0.getOperand(0).getOpcode() == ISD::SRL &&
5597       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
5598     uint64_t c1 =
5599       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
5600     uint64_t c2 = N1C->getZExtValue();
5601     EVT InnerShiftVT = N0.getOperand(0).getValueType();
5602     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
5603     uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5604     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5605     if (c1 + OpSizeInBits == InnerShiftSize) {
5606       SDLoc DL(N0);
5607       if (c1 + c2 >= InnerShiftSize)
5608         return DAG.getConstant(0, DL, VT);
5609       return DAG.getNode(ISD::TRUNCATE, DL, VT,
5610                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5611                                      N0.getOperand(0)->getOperand(0),
5612                                      DAG.getConstant(c1 + c2, DL,
5613                                                      ShiftCountVT)));
5614     }
5615   }
5616 
5617   // fold (srl (shl x, c), c) -> (and x, cst2)
5618   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5619       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5620     SDLoc DL(N);
5621     SDValue Mask =
5622         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
5623     AddToWorklist(Mask.getNode());
5624     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5625   }
5626 
5627   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5628   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5629     // Shifting in all undef bits?
5630     EVT SmallVT = N0.getOperand(0).getValueType();
5631     unsigned BitSize = SmallVT.getScalarSizeInBits();
5632     if (N1C->getZExtValue() >= BitSize)
5633       return DAG.getUNDEF(VT);
5634 
5635     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5636       uint64_t ShiftAmt = N1C->getZExtValue();
5637       SDLoc DL0(N0);
5638       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5639                                        N0.getOperand(0),
5640                           DAG.getConstant(ShiftAmt, DL0,
5641                                           getShiftAmountTy(SmallVT)));
5642       AddToWorklist(SmallShift.getNode());
5643       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
5644       SDLoc DL(N);
5645       return DAG.getNode(ISD::AND, DL, VT,
5646                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5647                          DAG.getConstant(Mask, DL, VT));
5648     }
5649   }
5650 
5651   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
5652   // bit, which is unmodified by sra.
5653   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5654     if (N0.getOpcode() == ISD::SRA)
5655       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5656   }
5657 
5658   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
5659   if (N1C && N0.getOpcode() == ISD::CTLZ &&
5660       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5661     APInt KnownZero, KnownOne;
5662     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
5663 
5664     // If any of the input bits are KnownOne, then the input couldn't be all
5665     // zeros, thus the result of the srl will always be zero.
5666     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5667 
5668     // If all of the bits input the to ctlz node are known to be zero, then
5669     // the result of the ctlz is "32" and the result of the shift is one.
5670     APInt UnknownBits = ~KnownZero;
5671     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5672 
5673     // Otherwise, check to see if there is exactly one bit input to the ctlz.
5674     if ((UnknownBits & (UnknownBits - 1)) == 0) {
5675       // Okay, we know that only that the single bit specified by UnknownBits
5676       // could be set on input to the CTLZ node. If this bit is set, the SRL
5677       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5678       // to an SRL/XOR pair, which is likely to simplify more.
5679       unsigned ShAmt = UnknownBits.countTrailingZeros();
5680       SDValue Op = N0.getOperand(0);
5681 
5682       if (ShAmt) {
5683         SDLoc DL(N0);
5684         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5685                   DAG.getConstant(ShAmt, DL,
5686                                   getShiftAmountTy(Op.getValueType())));
5687         AddToWorklist(Op.getNode());
5688       }
5689 
5690       SDLoc DL(N);
5691       return DAG.getNode(ISD::XOR, DL, VT,
5692                          Op, DAG.getConstant(1, DL, VT));
5693     }
5694   }
5695 
5696   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5697   if (N1.getOpcode() == ISD::TRUNCATE &&
5698       N1.getOperand(0).getOpcode() == ISD::AND) {
5699     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5700       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5701   }
5702 
5703   // fold operands of srl based on knowledge that the low bits are not
5704   // demanded.
5705   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5706     return SDValue(N, 0);
5707 
5708   if (N1C && !N1C->isOpaque())
5709     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5710       return NewSRL;
5711 
5712   // Attempt to convert a srl of a load into a narrower zero-extending load.
5713   if (SDValue NarrowLoad = ReduceLoadWidth(N))
5714     return NarrowLoad;
5715 
5716   // Here is a common situation. We want to optimize:
5717   //
5718   //   %a = ...
5719   //   %b = and i32 %a, 2
5720   //   %c = srl i32 %b, 1
5721   //   brcond i32 %c ...
5722   //
5723   // into
5724   //
5725   //   %a = ...
5726   //   %b = and %a, 2
5727   //   %c = setcc eq %b, 0
5728   //   brcond %c ...
5729   //
5730   // However when after the source operand of SRL is optimized into AND, the SRL
5731   // itself may not be optimized further. Look for it and add the BRCOND into
5732   // the worklist.
5733   if (N->hasOneUse()) {
5734     SDNode *Use = *N->use_begin();
5735     if (Use->getOpcode() == ISD::BRCOND)
5736       AddToWorklist(Use);
5737     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5738       // Also look pass the truncate.
5739       Use = *Use->use_begin();
5740       if (Use->getOpcode() == ISD::BRCOND)
5741         AddToWorklist(Use);
5742     }
5743   }
5744 
5745   return SDValue();
5746 }
5747 
5748 SDValue DAGCombiner::visitABS(SDNode *N) {
5749   SDValue N0 = N->getOperand(0);
5750   EVT VT = N->getValueType(0);
5751 
5752   // fold (abs c1) -> c2
5753   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5754     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
5755   // fold (abs (abs x)) -> (abs x)
5756   if (N0.getOpcode() == ISD::ABS)
5757     return N0;
5758   // fold (abs x) -> x iff not-negative
5759   if (DAG.SignBitIsZero(N0))
5760     return N0;
5761   return SDValue();
5762 }
5763 
5764 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5765   SDValue N0 = N->getOperand(0);
5766   EVT VT = N->getValueType(0);
5767 
5768   // fold (bswap c1) -> c2
5769   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5770     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5771   // fold (bswap (bswap x)) -> x
5772   if (N0.getOpcode() == ISD::BSWAP)
5773     return N0->getOperand(0);
5774   return SDValue();
5775 }
5776 
5777 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5778   SDValue N0 = N->getOperand(0);
5779   EVT VT = N->getValueType(0);
5780 
5781   // fold (bitreverse c1) -> c2
5782   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5783     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
5784   // fold (bitreverse (bitreverse x)) -> x
5785   if (N0.getOpcode() == ISD::BITREVERSE)
5786     return N0.getOperand(0);
5787   return SDValue();
5788 }
5789 
5790 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5791   SDValue N0 = N->getOperand(0);
5792   EVT VT = N->getValueType(0);
5793 
5794   // fold (ctlz c1) -> c2
5795   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5796     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5797   return SDValue();
5798 }
5799 
5800 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5801   SDValue N0 = N->getOperand(0);
5802   EVT VT = N->getValueType(0);
5803 
5804   // fold (ctlz_zero_undef c1) -> c2
5805   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5806     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5807   return SDValue();
5808 }
5809 
5810 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5811   SDValue N0 = N->getOperand(0);
5812   EVT VT = N->getValueType(0);
5813 
5814   // fold (cttz c1) -> c2
5815   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5816     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5817   return SDValue();
5818 }
5819 
5820 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5821   SDValue N0 = N->getOperand(0);
5822   EVT VT = N->getValueType(0);
5823 
5824   // fold (cttz_zero_undef c1) -> c2
5825   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5826     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5827   return SDValue();
5828 }
5829 
5830 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5831   SDValue N0 = N->getOperand(0);
5832   EVT VT = N->getValueType(0);
5833 
5834   // fold (ctpop c1) -> c2
5835   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
5836     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5837   return SDValue();
5838 }
5839 
5840 
5841 /// \brief Generate Min/Max node
5842 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5843                                    SDValue RHS, SDValue True, SDValue False,
5844                                    ISD::CondCode CC, const TargetLowering &TLI,
5845                                    SelectionDAG &DAG) {
5846   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5847     return SDValue();
5848 
5849   switch (CC) {
5850   case ISD::SETOLT:
5851   case ISD::SETOLE:
5852   case ISD::SETLT:
5853   case ISD::SETLE:
5854   case ISD::SETULT:
5855   case ISD::SETULE: {
5856     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5857     if (TLI.isOperationLegal(Opcode, VT))
5858       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5859     return SDValue();
5860   }
5861   case ISD::SETOGT:
5862   case ISD::SETOGE:
5863   case ISD::SETGT:
5864   case ISD::SETGE:
5865   case ISD::SETUGT:
5866   case ISD::SETUGE: {
5867     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5868     if (TLI.isOperationLegal(Opcode, VT))
5869       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5870     return SDValue();
5871   }
5872   default:
5873     return SDValue();
5874   }
5875 }
5876 
5877 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5878   SDValue Cond = N->getOperand(0);
5879   SDValue N1 = N->getOperand(1);
5880   SDValue N2 = N->getOperand(2);
5881   EVT VT = N->getValueType(0);
5882   EVT CondVT = Cond.getValueType();
5883   SDLoc DL(N);
5884 
5885   if (!VT.isInteger())
5886     return SDValue();
5887 
5888   auto *C1 = dyn_cast<ConstantSDNode>(N1);
5889   auto *C2 = dyn_cast<ConstantSDNode>(N2);
5890   if (!C1 || !C2)
5891     return SDValue();
5892 
5893   // Only do this before legalization to avoid conflicting with target-specific
5894   // transforms in the other direction (create a select from a zext/sext). There
5895   // is also a target-independent combine here in DAGCombiner in the other
5896   // direction for (select Cond, -1, 0) when the condition is not i1.
5897   if (CondVT == MVT::i1 && !LegalOperations) {
5898     if (C1->isNullValue() && C2->isOne()) {
5899       // select Cond, 0, 1 --> zext (!Cond)
5900       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
5901       if (VT != MVT::i1)
5902         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
5903       return NotCond;
5904     }
5905     if (C1->isNullValue() && C2->isAllOnesValue()) {
5906       // select Cond, 0, -1 --> sext (!Cond)
5907       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
5908       if (VT != MVT::i1)
5909         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
5910       return NotCond;
5911     }
5912     if (C1->isOne() && C2->isNullValue()) {
5913       // select Cond, 1, 0 --> zext (Cond)
5914       if (VT != MVT::i1)
5915         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
5916       return Cond;
5917     }
5918     if (C1->isAllOnesValue() && C2->isNullValue()) {
5919       // select Cond, -1, 0 --> sext (Cond)
5920       if (VT != MVT::i1)
5921         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
5922       return Cond;
5923     }
5924 
5925     // For any constants that differ by 1, we can transform the select into an
5926     // extend and add. Use a target hook because some targets may prefer to
5927     // transform in the other direction.
5928     if (TLI.convertSelectOfConstantsToMath()) {
5929       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
5930         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
5931         if (VT != MVT::i1)
5932           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
5933         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
5934       }
5935       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
5936         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
5937         if (VT != MVT::i1)
5938           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
5939         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
5940       }
5941     }
5942 
5943     return SDValue();
5944   }
5945 
5946   // fold (select Cond, 0, 1) -> (xor Cond, 1)
5947   // We can't do this reliably if integer based booleans have different contents
5948   // to floating point based booleans. This is because we can't tell whether we
5949   // have an integer-based boolean or a floating-point-based boolean unless we
5950   // can find the SETCC that produced it and inspect its operands. This is
5951   // fairly easy if C is the SETCC node, but it can potentially be
5952   // undiscoverable (or not reasonably discoverable). For example, it could be
5953   // in another basic block or it could require searching a complicated
5954   // expression.
5955   if (CondVT.isInteger() &&
5956       TLI.getBooleanContents(false, true) ==
5957           TargetLowering::ZeroOrOneBooleanContent &&
5958       TLI.getBooleanContents(false, false) ==
5959           TargetLowering::ZeroOrOneBooleanContent &&
5960       C1->isNullValue() && C2->isOne()) {
5961     SDValue NotCond =
5962         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
5963     if (VT.bitsEq(CondVT))
5964       return NotCond;
5965     return DAG.getZExtOrTrunc(NotCond, DL, VT);
5966   }
5967 
5968   return SDValue();
5969 }
5970 
5971 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5972   SDValue N0 = N->getOperand(0);
5973   SDValue N1 = N->getOperand(1);
5974   SDValue N2 = N->getOperand(2);
5975   EVT VT = N->getValueType(0);
5976   EVT VT0 = N0.getValueType();
5977 
5978   // fold (select C, X, X) -> X
5979   if (N1 == N2)
5980     return N1;
5981   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5982     // fold (select true, X, Y) -> X
5983     // fold (select false, X, Y) -> Y
5984     return !N0C->isNullValue() ? N1 : N2;
5985   }
5986   // fold (select X, X, Y) -> (or X, Y)
5987   // fold (select X, 1, Y) -> (or C, Y)
5988   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5989     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5990 
5991   if (SDValue V = foldSelectOfConstants(N))
5992     return V;
5993 
5994   // fold (select C, 0, X) -> (and (not C), X)
5995   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5996     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5997     AddToWorklist(NOTNode.getNode());
5998     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5999   }
6000   // fold (select C, X, 1) -> (or (not C), X)
6001   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6002     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6003     AddToWorklist(NOTNode.getNode());
6004     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
6005   }
6006   // fold (select X, Y, X) -> (and X, Y)
6007   // fold (select X, Y, 0) -> (and X, Y)
6008   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6009     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
6010 
6011   // If we can fold this based on the true/false value, do so.
6012   if (SimplifySelectOps(N, N1, N2))
6013     return SDValue(N, 0);  // Don't revisit N.
6014 
6015   if (VT0 == MVT::i1) {
6016     // The code in this block deals with the following 2 equivalences:
6017     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6018     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6019     // The target can specify its preferred form with the
6020     // shouldNormalizeToSelectSequence() callback. However we always transform
6021     // to the right anyway if we find the inner select exists in the DAG anyway
6022     // and we always transform to the left side if we know that we can further
6023     // optimize the combination of the conditions.
6024     bool normalizeToSequence
6025       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6026     // select (and Cond0, Cond1), X, Y
6027     //   -> select Cond0, (select Cond1, X, Y), Y
6028     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6029       SDValue Cond0 = N0->getOperand(0);
6030       SDValue Cond1 = N0->getOperand(1);
6031       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6032                                         N1.getValueType(), Cond1, N1, N2);
6033       if (normalizeToSequence || !InnerSelect.use_empty())
6034         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
6035                            InnerSelect, N2);
6036     }
6037     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6038     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6039       SDValue Cond0 = N0->getOperand(0);
6040       SDValue Cond1 = N0->getOperand(1);
6041       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
6042                                         N1.getValueType(), Cond1, N1, N2);
6043       if (normalizeToSequence || !InnerSelect.use_empty())
6044         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
6045                            InnerSelect);
6046     }
6047 
6048     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6049     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6050       SDValue N1_0 = N1->getOperand(0);
6051       SDValue N1_1 = N1->getOperand(1);
6052       SDValue N1_2 = N1->getOperand(2);
6053       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6054         // Create the actual and node if we can generate good code for it.
6055         if (!normalizeToSequence) {
6056           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
6057                                     N0, N1_0);
6058           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
6059                              N1_1, N2);
6060         }
6061         // Otherwise see if we can optimize the "and" to a better pattern.
6062         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6063           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6064                              N1_1, N2);
6065       }
6066     }
6067     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6068     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6069       SDValue N2_0 = N2->getOperand(0);
6070       SDValue N2_1 = N2->getOperand(1);
6071       SDValue N2_2 = N2->getOperand(2);
6072       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6073         // Create the actual or node if we can generate good code for it.
6074         if (!normalizeToSequence) {
6075           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
6076                                    N0, N2_0);
6077           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
6078                              N1, N2_2);
6079         }
6080         // Otherwise see if we can optimize to a better pattern.
6081         if (SDValue Combined = visitORLike(N0, N2_0, N))
6082           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
6083                              N1, N2_2);
6084       }
6085     }
6086   }
6087 
6088   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6089   if (VT0 == MVT::i1) {
6090     if (N0->getOpcode() == ISD::XOR) {
6091       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6092         SDValue Cond0 = N0->getOperand(0);
6093         if (C->isOne())
6094           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
6095                              Cond0, N2, N1);
6096       }
6097     }
6098   }
6099 
6100   // fold selects based on a setcc into other things, such as min/max/abs
6101   if (N0.getOpcode() == ISD::SETCC) {
6102     // select x, y (fcmp lt x, y) -> fminnum x, y
6103     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6104     //
6105     // This is OK if we don't care about what happens if either operand is a
6106     // NaN.
6107     //
6108 
6109     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6110     // no signed zeros as well as no nans.
6111     const TargetOptions &Options = DAG.getTarget().Options;
6112     if (Options.UnsafeFPMath &&
6113         VT.isFloatingPoint() && N0.hasOneUse() &&
6114         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6115       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6116 
6117       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
6118                                                 N0.getOperand(1), N1, N2, CC,
6119                                                 TLI, DAG))
6120         return FMinMax;
6121     }
6122 
6123     if ((!LegalOperations &&
6124          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6125         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6126       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
6127                          N0.getOperand(0), N0.getOperand(1),
6128                          N1, N2, N0.getOperand(2));
6129     return SimplifySelect(SDLoc(N), N0, N1, N2);
6130   }
6131 
6132   return SDValue();
6133 }
6134 
6135 static
6136 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6137   SDLoc DL(N);
6138   EVT LoVT, HiVT;
6139   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6140 
6141   // Split the inputs.
6142   SDValue Lo, Hi, LL, LH, RL, RH;
6143   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6144   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6145 
6146   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6147   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6148 
6149   return std::make_pair(Lo, Hi);
6150 }
6151 
6152 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6153 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6154 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6155   SDLoc DL(N);
6156   SDValue Cond = N->getOperand(0);
6157   SDValue LHS = N->getOperand(1);
6158   SDValue RHS = N->getOperand(2);
6159   EVT VT = N->getValueType(0);
6160   int NumElems = VT.getVectorNumElements();
6161   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6162          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6163          Cond.getOpcode() == ISD::BUILD_VECTOR);
6164 
6165   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6166   // binary ones here.
6167   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6168     return SDValue();
6169 
6170   // We're sure we have an even number of elements due to the
6171   // concat_vectors we have as arguments to vselect.
6172   // Skip BV elements until we find one that's not an UNDEF
6173   // After we find an UNDEF element, keep looping until we get to half the
6174   // length of the BV and see if all the non-undef nodes are the same.
6175   ConstantSDNode *BottomHalf = nullptr;
6176   for (int i = 0; i < NumElems / 2; ++i) {
6177     if (Cond->getOperand(i)->isUndef())
6178       continue;
6179 
6180     if (BottomHalf == nullptr)
6181       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6182     else if (Cond->getOperand(i).getNode() != BottomHalf)
6183       return SDValue();
6184   }
6185 
6186   // Do the same for the second half of the BuildVector
6187   ConstantSDNode *TopHalf = nullptr;
6188   for (int i = NumElems / 2; i < NumElems; ++i) {
6189     if (Cond->getOperand(i)->isUndef())
6190       continue;
6191 
6192     if (TopHalf == nullptr)
6193       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6194     else if (Cond->getOperand(i).getNode() != TopHalf)
6195       return SDValue();
6196   }
6197 
6198   assert(TopHalf && BottomHalf &&
6199          "One half of the selector was all UNDEFs and the other was all the "
6200          "same value. This should have been addressed before this function.");
6201   return DAG.getNode(
6202       ISD::CONCAT_VECTORS, DL, VT,
6203       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6204       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6205 }
6206 
6207 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6208 
6209   if (Level >= AfterLegalizeTypes)
6210     return SDValue();
6211 
6212   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6213   SDValue Mask = MSC->getMask();
6214   SDValue Data  = MSC->getValue();
6215   SDLoc DL(N);
6216 
6217   // If the MSCATTER data type requires splitting and the mask is provided by a
6218   // SETCC, then split both nodes and its operands before legalization. This
6219   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6220   // and enables future optimizations (e.g. min/max pattern matching on X86).
6221   if (Mask.getOpcode() != ISD::SETCC)
6222     return SDValue();
6223 
6224   // Check if any splitting is required.
6225   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6226       TargetLowering::TypeSplitVector)
6227     return SDValue();
6228   SDValue MaskLo, MaskHi, Lo, Hi;
6229   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6230 
6231   EVT LoVT, HiVT;
6232   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6233 
6234   SDValue Chain = MSC->getChain();
6235 
6236   EVT MemoryVT = MSC->getMemoryVT();
6237   unsigned Alignment = MSC->getOriginalAlignment();
6238 
6239   EVT LoMemVT, HiMemVT;
6240   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6241 
6242   SDValue DataLo, DataHi;
6243   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6244 
6245   SDValue BasePtr = MSC->getBasePtr();
6246   SDValue IndexLo, IndexHi;
6247   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6248 
6249   MachineMemOperand *MMO = DAG.getMachineFunction().
6250     getMachineMemOperand(MSC->getPointerInfo(),
6251                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6252                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6253 
6254   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
6255   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6256                             DL, OpsLo, MMO);
6257 
6258   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
6259   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6260                             DL, OpsHi, MMO);
6261 
6262   AddToWorklist(Lo.getNode());
6263   AddToWorklist(Hi.getNode());
6264 
6265   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6266 }
6267 
6268 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6269 
6270   if (Level >= AfterLegalizeTypes)
6271     return SDValue();
6272 
6273   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6274   SDValue Mask = MST->getMask();
6275   SDValue Data  = MST->getValue();
6276   EVT VT = Data.getValueType();
6277   SDLoc DL(N);
6278 
6279   // If the MSTORE data type requires splitting and the mask is provided by a
6280   // SETCC, then split both nodes and its operands before legalization. This
6281   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6282   // and enables future optimizations (e.g. min/max pattern matching on X86).
6283   if (Mask.getOpcode() == ISD::SETCC) {
6284 
6285     // Check if any splitting is required.
6286     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6287         TargetLowering::TypeSplitVector)
6288       return SDValue();
6289 
6290     SDValue MaskLo, MaskHi, Lo, Hi;
6291     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6292 
6293     SDValue Chain = MST->getChain();
6294     SDValue Ptr   = MST->getBasePtr();
6295 
6296     EVT MemoryVT = MST->getMemoryVT();
6297     unsigned Alignment = MST->getOriginalAlignment();
6298 
6299     // if Alignment is equal to the vector size,
6300     // take the half of it for the second part
6301     unsigned SecondHalfAlignment =
6302       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6303 
6304     EVT LoMemVT, HiMemVT;
6305     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6306 
6307     SDValue DataLo, DataHi;
6308     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6309 
6310     MachineMemOperand *MMO = DAG.getMachineFunction().
6311       getMachineMemOperand(MST->getPointerInfo(),
6312                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6313                            Alignment, MST->getAAInfo(), MST->getRanges());
6314 
6315     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6316                             MST->isTruncatingStore(),
6317                             MST->isCompressingStore());
6318 
6319     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6320                                      MST->isCompressingStore());
6321 
6322     MMO = DAG.getMachineFunction().
6323       getMachineMemOperand(MST->getPointerInfo(),
6324                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
6325                            SecondHalfAlignment, MST->getAAInfo(),
6326                            MST->getRanges());
6327 
6328     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6329                             MST->isTruncatingStore(),
6330                             MST->isCompressingStore());
6331 
6332     AddToWorklist(Lo.getNode());
6333     AddToWorklist(Hi.getNode());
6334 
6335     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6336   }
6337   return SDValue();
6338 }
6339 
6340 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6341 
6342   if (Level >= AfterLegalizeTypes)
6343     return SDValue();
6344 
6345   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
6346   SDValue Mask = MGT->getMask();
6347   SDLoc DL(N);
6348 
6349   // If the MGATHER result requires splitting and the mask is provided by a
6350   // SETCC, then split both nodes and its operands before legalization. This
6351   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6352   // and enables future optimizations (e.g. min/max pattern matching on X86).
6353 
6354   if (Mask.getOpcode() != ISD::SETCC)
6355     return SDValue();
6356 
6357   EVT VT = N->getValueType(0);
6358 
6359   // Check if any splitting is required.
6360   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6361       TargetLowering::TypeSplitVector)
6362     return SDValue();
6363 
6364   SDValue MaskLo, MaskHi, Lo, Hi;
6365   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6366 
6367   SDValue Src0 = MGT->getValue();
6368   SDValue Src0Lo, Src0Hi;
6369   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6370 
6371   EVT LoVT, HiVT;
6372   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6373 
6374   SDValue Chain = MGT->getChain();
6375   EVT MemoryVT = MGT->getMemoryVT();
6376   unsigned Alignment = MGT->getOriginalAlignment();
6377 
6378   EVT LoMemVT, HiMemVT;
6379   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6380 
6381   SDValue BasePtr = MGT->getBasePtr();
6382   SDValue Index = MGT->getIndex();
6383   SDValue IndexLo, IndexHi;
6384   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6385 
6386   MachineMemOperand *MMO = DAG.getMachineFunction().
6387     getMachineMemOperand(MGT->getPointerInfo(),
6388                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6389                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6390 
6391   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
6392   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6393                             MMO);
6394 
6395   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
6396   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6397                             MMO);
6398 
6399   AddToWorklist(Lo.getNode());
6400   AddToWorklist(Hi.getNode());
6401 
6402   // Build a factor node to remember that this load is independent of the
6403   // other one.
6404   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6405                       Hi.getValue(1));
6406 
6407   // Legalized the chain result - switch anything that used the old chain to
6408   // use the new one.
6409   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6410 
6411   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6412 
6413   SDValue RetOps[] = { GatherRes, Chain };
6414   return DAG.getMergeValues(RetOps, DL);
6415 }
6416 
6417 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6418 
6419   if (Level >= AfterLegalizeTypes)
6420     return SDValue();
6421 
6422   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6423   SDValue Mask = MLD->getMask();
6424   SDLoc DL(N);
6425 
6426   // If the MLOAD result requires splitting and the mask is provided by a
6427   // SETCC, then split both nodes and its operands before legalization. This
6428   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6429   // and enables future optimizations (e.g. min/max pattern matching on X86).
6430 
6431   if (Mask.getOpcode() == ISD::SETCC) {
6432     EVT VT = N->getValueType(0);
6433 
6434     // Check if any splitting is required.
6435     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6436         TargetLowering::TypeSplitVector)
6437       return SDValue();
6438 
6439     SDValue MaskLo, MaskHi, Lo, Hi;
6440     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6441 
6442     SDValue Src0 = MLD->getSrc0();
6443     SDValue Src0Lo, Src0Hi;
6444     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6445 
6446     EVT LoVT, HiVT;
6447     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6448 
6449     SDValue Chain = MLD->getChain();
6450     SDValue Ptr   = MLD->getBasePtr();
6451     EVT MemoryVT = MLD->getMemoryVT();
6452     unsigned Alignment = MLD->getOriginalAlignment();
6453 
6454     // if Alignment is equal to the vector size,
6455     // take the half of it for the second part
6456     unsigned SecondHalfAlignment =
6457       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6458          Alignment/2 : Alignment;
6459 
6460     EVT LoMemVT, HiMemVT;
6461     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6462 
6463     MachineMemOperand *MMO = DAG.getMachineFunction().
6464     getMachineMemOperand(MLD->getPointerInfo(),
6465                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6466                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6467 
6468     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6469                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6470 
6471     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6472                                      MLD->isExpandingLoad());
6473 
6474     MMO = DAG.getMachineFunction().
6475     getMachineMemOperand(MLD->getPointerInfo(),
6476                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
6477                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
6478 
6479     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6480                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6481 
6482     AddToWorklist(Lo.getNode());
6483     AddToWorklist(Hi.getNode());
6484 
6485     // Build a factor node to remember that this load is independent of the
6486     // other one.
6487     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6488                         Hi.getValue(1));
6489 
6490     // Legalized the chain result - switch anything that used the old chain to
6491     // use the new one.
6492     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6493 
6494     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6495 
6496     SDValue RetOps[] = { LoadRes, Chain };
6497     return DAG.getMergeValues(RetOps, DL);
6498   }
6499   return SDValue();
6500 }
6501 
6502 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
6503   SDValue N0 = N->getOperand(0);
6504   SDValue N1 = N->getOperand(1);
6505   SDValue N2 = N->getOperand(2);
6506   SDLoc DL(N);
6507 
6508   // fold (vselect C, X, X) -> X
6509   if (N1 == N2)
6510     return N1;
6511 
6512   // Canonicalize integer abs.
6513   // vselect (setg[te] X,  0),  X, -X ->
6514   // vselect (setgt    X, -1),  X, -X ->
6515   // vselect (setl[te] X,  0), -X,  X ->
6516   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6517   if (N0.getOpcode() == ISD::SETCC) {
6518     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6519     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6520     bool isAbs = false;
6521     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
6522 
6523     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
6524          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
6525         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
6526       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
6527     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
6528              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
6529       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6530 
6531     if (isAbs) {
6532       EVT VT = LHS.getValueType();
6533       SDValue Shift = DAG.getNode(
6534           ISD::SRA, DL, VT, LHS,
6535           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
6536       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
6537       AddToWorklist(Shift.getNode());
6538       AddToWorklist(Add.getNode());
6539       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
6540     }
6541   }
6542 
6543   if (SimplifySelectOps(N, N1, N2))
6544     return SDValue(N, 0);  // Don't revisit N.
6545 
6546   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
6547   if (ISD::isBuildVectorAllOnes(N0.getNode()))
6548     return N1;
6549   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
6550   if (ISD::isBuildVectorAllZeros(N0.getNode()))
6551     return N2;
6552 
6553   // The ConvertSelectToConcatVector function is assuming both the above
6554   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
6555   // and addressed.
6556   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
6557       N2.getOpcode() == ISD::CONCAT_VECTORS &&
6558       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
6559     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
6560       return CV;
6561   }
6562 
6563   return SDValue();
6564 }
6565 
6566 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
6567   SDValue N0 = N->getOperand(0);
6568   SDValue N1 = N->getOperand(1);
6569   SDValue N2 = N->getOperand(2);
6570   SDValue N3 = N->getOperand(3);
6571   SDValue N4 = N->getOperand(4);
6572   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
6573 
6574   // fold select_cc lhs, rhs, x, x, cc -> x
6575   if (N2 == N3)
6576     return N2;
6577 
6578   // Determine if the condition we're dealing with is constant
6579   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
6580                                   CC, SDLoc(N), false)) {
6581     AddToWorklist(SCC.getNode());
6582 
6583     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
6584       if (!SCCC->isNullValue())
6585         return N2;    // cond always true -> true val
6586       else
6587         return N3;    // cond always false -> false val
6588     } else if (SCC->isUndef()) {
6589       // When the condition is UNDEF, just return the first operand. This is
6590       // coherent the DAG creation, no setcc node is created in this case
6591       return N2;
6592     } else if (SCC.getOpcode() == ISD::SETCC) {
6593       // Fold to a simpler select_cc
6594       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6595                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6596                          SCC.getOperand(2));
6597     }
6598   }
6599 
6600   // If we can fold this based on the true/false value, do so.
6601   if (SimplifySelectOps(N, N2, N3))
6602     return SDValue(N, 0);  // Don't revisit N.
6603 
6604   // fold select_cc into other things, such as min/max/abs
6605   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6606 }
6607 
6608 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6609   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6610                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
6611                        SDLoc(N));
6612 }
6613 
6614 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6615   SDValue LHS = N->getOperand(0);
6616   SDValue RHS = N->getOperand(1);
6617   SDValue Carry = N->getOperand(2);
6618   SDValue Cond = N->getOperand(3);
6619 
6620   // If Carry is false, fold to a regular SETCC.
6621   if (Carry.getOpcode() == ISD::CARRY_FALSE)
6622     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6623 
6624   return SDValue();
6625 }
6626 
6627 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6628 /// a build_vector of constants.
6629 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6630 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6631 /// Vector extends are not folded if operations are legal; this is to
6632 /// avoid introducing illegal build_vector dag nodes.
6633 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
6634                                          SelectionDAG &DAG, bool LegalTypes,
6635                                          bool LegalOperations) {
6636   unsigned Opcode = N->getOpcode();
6637   SDValue N0 = N->getOperand(0);
6638   EVT VT = N->getValueType(0);
6639 
6640   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6641          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6642          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
6643          && "Expected EXTEND dag node in input!");
6644 
6645   // fold (sext c1) -> c1
6646   // fold (zext c1) -> c1
6647   // fold (aext c1) -> c1
6648   if (isa<ConstantSDNode>(N0))
6649     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6650 
6651   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6652   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6653   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6654   EVT SVT = VT.getScalarType();
6655   if (!(VT.isVector() &&
6656       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6657       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
6658     return nullptr;
6659 
6660   // We can fold this node into a build_vector.
6661   unsigned VTBits = SVT.getSizeInBits();
6662   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6663   SmallVector<SDValue, 8> Elts;
6664   unsigned NumElts = VT.getVectorNumElements();
6665   SDLoc DL(N);
6666 
6667   for (unsigned i=0; i != NumElts; ++i) {
6668     SDValue Op = N0->getOperand(i);
6669     if (Op->isUndef()) {
6670       Elts.push_back(DAG.getUNDEF(SVT));
6671       continue;
6672     }
6673 
6674     SDLoc DL(Op);
6675     // Get the constant value and if needed trunc it to the size of the type.
6676     // Nodes like build_vector might have constants wider than the scalar type.
6677     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6678     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6679       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6680     else
6681       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6682   }
6683 
6684   return DAG.getBuildVector(VT, DL, Elts).getNode();
6685 }
6686 
6687 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6688 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6689 // transformation. Returns true if extension are possible and the above
6690 // mentioned transformation is profitable.
6691 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
6692                                     unsigned ExtOpc,
6693                                     SmallVectorImpl<SDNode *> &ExtendNodes,
6694                                     const TargetLowering &TLI) {
6695   bool HasCopyToRegUses = false;
6696   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6697   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6698                             UE = N0.getNode()->use_end();
6699        UI != UE; ++UI) {
6700     SDNode *User = *UI;
6701     if (User == N)
6702       continue;
6703     if (UI.getUse().getResNo() != N0.getResNo())
6704       continue;
6705     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6706     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6707       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6708       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6709         // Sign bits will be lost after a zext.
6710         return false;
6711       bool Add = false;
6712       for (unsigned i = 0; i != 2; ++i) {
6713         SDValue UseOp = User->getOperand(i);
6714         if (UseOp == N0)
6715           continue;
6716         if (!isa<ConstantSDNode>(UseOp))
6717           return false;
6718         Add = true;
6719       }
6720       if (Add)
6721         ExtendNodes.push_back(User);
6722       continue;
6723     }
6724     // If truncates aren't free and there are users we can't
6725     // extend, it isn't worthwhile.
6726     if (!isTruncFree)
6727       return false;
6728     // Remember if this value is live-out.
6729     if (User->getOpcode() == ISD::CopyToReg)
6730       HasCopyToRegUses = true;
6731   }
6732 
6733   if (HasCopyToRegUses) {
6734     bool BothLiveOut = false;
6735     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6736          UI != UE; ++UI) {
6737       SDUse &Use = UI.getUse();
6738       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6739         BothLiveOut = true;
6740         break;
6741       }
6742     }
6743     if (BothLiveOut)
6744       // Both unextended and extended values are live out. There had better be
6745       // a good reason for the transformation.
6746       return ExtendNodes.size();
6747   }
6748   return true;
6749 }
6750 
6751 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6752                                   SDValue Trunc, SDValue ExtLoad,
6753                                   const SDLoc &DL, ISD::NodeType ExtType) {
6754   // Extend SetCC uses if necessary.
6755   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6756     SDNode *SetCC = SetCCs[i];
6757     SmallVector<SDValue, 4> Ops;
6758 
6759     for (unsigned j = 0; j != 2; ++j) {
6760       SDValue SOp = SetCC->getOperand(j);
6761       if (SOp == Trunc)
6762         Ops.push_back(ExtLoad);
6763       else
6764         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6765     }
6766 
6767     Ops.push_back(SetCC->getOperand(2));
6768     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6769   }
6770 }
6771 
6772 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6773 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6774   SDValue N0 = N->getOperand(0);
6775   EVT DstVT = N->getValueType(0);
6776   EVT SrcVT = N0.getValueType();
6777 
6778   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6779           N->getOpcode() == ISD::ZERO_EXTEND) &&
6780          "Unexpected node type (not an extend)!");
6781 
6782   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6783   // For example, on a target with legal v4i32, but illegal v8i32, turn:
6784   //   (v8i32 (sext (v8i16 (load x))))
6785   // into:
6786   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
6787   //                          (v4i32 (sextload (x + 16)))))
6788   // Where uses of the original load, i.e.:
6789   //   (v8i16 (load x))
6790   // are replaced with:
6791   //   (v8i16 (truncate
6792   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
6793   //                            (v4i32 (sextload (x + 16)))))))
6794   //
6795   // This combine is only applicable to illegal, but splittable, vectors.
6796   // All legal types, and illegal non-vector types, are handled elsewhere.
6797   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6798   //
6799   if (N0->getOpcode() != ISD::LOAD)
6800     return SDValue();
6801 
6802   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6803 
6804   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6805       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6806       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6807     return SDValue();
6808 
6809   SmallVector<SDNode *, 4> SetCCs;
6810   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6811     return SDValue();
6812 
6813   ISD::LoadExtType ExtType =
6814       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
6815 
6816   // Try to split the vector types to get down to legal types.
6817   EVT SplitSrcVT = SrcVT;
6818   EVT SplitDstVT = DstVT;
6819   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6820          SplitSrcVT.getVectorNumElements() > 1) {
6821     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6822     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6823   }
6824 
6825   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6826     return SDValue();
6827 
6828   SDLoc DL(N);
6829   const unsigned NumSplits =
6830       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6831   const unsigned Stride = SplitSrcVT.getStoreSize();
6832   SmallVector<SDValue, 4> Loads;
6833   SmallVector<SDValue, 4> Chains;
6834 
6835   SDValue BasePtr = LN0->getBasePtr();
6836   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6837     const unsigned Offset = Idx * Stride;
6838     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6839 
6840     SDValue SplitLoad = DAG.getExtLoad(
6841         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6842         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6843         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6844 
6845     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6846                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6847 
6848     Loads.push_back(SplitLoad.getValue(0));
6849     Chains.push_back(SplitLoad.getValue(1));
6850   }
6851 
6852   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6853   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6854 
6855   // Simplify TF.
6856   AddToWorklist(NewChain.getNode());
6857 
6858   CombineTo(N, NewValue);
6859 
6860   // Replace uses of the original load (before extension)
6861   // with a truncate of the concatenated sextloaded vectors.
6862   SDValue Trunc =
6863       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6864   CombineTo(N0.getNode(), Trunc, NewChain);
6865   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6866                   (ISD::NodeType)N->getOpcode());
6867   return SDValue(N, 0); // Return N so it doesn't get rechecked!
6868 }
6869 
6870 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6871   SDValue N0 = N->getOperand(0);
6872   EVT VT = N->getValueType(0);
6873   SDLoc DL(N);
6874 
6875   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6876                                               LegalOperations))
6877     return SDValue(Res, 0);
6878 
6879   // fold (sext (sext x)) -> (sext x)
6880   // fold (sext (aext x)) -> (sext x)
6881   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6882     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
6883 
6884   if (N0.getOpcode() == ISD::TRUNCATE) {
6885     // fold (sext (truncate (load x))) -> (sext (smaller load x))
6886     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6887     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6888       SDNode *oye = N0.getOperand(0).getNode();
6889       if (NarrowLoad.getNode() != N0.getNode()) {
6890         CombineTo(N0.getNode(), NarrowLoad);
6891         // CombineTo deleted the truncate, if needed, but not what's under it.
6892         AddToWorklist(oye);
6893       }
6894       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6895     }
6896 
6897     // See if the value being truncated is already sign extended.  If so, just
6898     // eliminate the trunc/sext pair.
6899     SDValue Op = N0.getOperand(0);
6900     unsigned OpBits   = Op.getScalarValueSizeInBits();
6901     unsigned MidBits  = N0.getScalarValueSizeInBits();
6902     unsigned DestBits = VT.getScalarSizeInBits();
6903     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6904 
6905     if (OpBits == DestBits) {
6906       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
6907       // bits, it is already ready.
6908       if (NumSignBits > DestBits-MidBits)
6909         return Op;
6910     } else if (OpBits < DestBits) {
6911       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
6912       // bits, just sext from i32.
6913       if (NumSignBits > OpBits-MidBits)
6914         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
6915     } else {
6916       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
6917       // bits, just truncate to i32.
6918       if (NumSignBits > OpBits-MidBits)
6919         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6920     }
6921 
6922     // fold (sext (truncate x)) -> (sextinreg x).
6923     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6924                                                  N0.getValueType())) {
6925       if (OpBits < DestBits)
6926         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6927       else if (OpBits > DestBits)
6928         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6929       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
6930                          DAG.getValueType(N0.getValueType()));
6931     }
6932   }
6933 
6934   // fold (sext (load x)) -> (sext (truncate (sextload x)))
6935   // Only generate vector extloads when 1) they're legal, and 2) they are
6936   // deemed desirable by the target.
6937   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6938       ((!LegalOperations && !VT.isVector() &&
6939         !cast<LoadSDNode>(N0)->isVolatile()) ||
6940        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6941     bool DoXform = true;
6942     SmallVector<SDNode*, 4> SetCCs;
6943     if (!N0.hasOneUse())
6944       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6945     if (VT.isVector())
6946       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6947     if (DoXform) {
6948       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6949       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
6950                                        LN0->getBasePtr(), N0.getValueType(),
6951                                        LN0->getMemOperand());
6952       CombineTo(N, ExtLoad);
6953       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6954                                   N0.getValueType(), ExtLoad);
6955       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6956       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
6957       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6958     }
6959   }
6960 
6961   // fold (sext (load x)) to multiple smaller sextloads.
6962   // Only on illegal but splittable vectors.
6963   if (SDValue ExtLoad = CombineExtLoad(N))
6964     return ExtLoad;
6965 
6966   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6967   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6968   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6969       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6970     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6971     EVT MemVT = LN0->getMemoryVT();
6972     if ((!LegalOperations && !LN0->isVolatile()) ||
6973         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6974       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
6975                                        LN0->getBasePtr(), MemVT,
6976                                        LN0->getMemOperand());
6977       CombineTo(N, ExtLoad);
6978       CombineTo(N0.getNode(),
6979                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6980                             N0.getValueType(), ExtLoad),
6981                 ExtLoad.getValue(1));
6982       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6983     }
6984   }
6985 
6986   // fold (sext (and/or/xor (load x), cst)) ->
6987   //      (and/or/xor (sextload x), (sext cst))
6988   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6989        N0.getOpcode() == ISD::XOR) &&
6990       isa<LoadSDNode>(N0.getOperand(0)) &&
6991       N0.getOperand(1).getOpcode() == ISD::Constant &&
6992       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6993       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6994     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6995     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6996       bool DoXform = true;
6997       SmallVector<SDNode*, 4> SetCCs;
6998       if (!N0.hasOneUse())
6999         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
7000                                           SetCCs, TLI);
7001       if (DoXform) {
7002         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
7003                                          LN0->getChain(), LN0->getBasePtr(),
7004                                          LN0->getMemoryVT(),
7005                                          LN0->getMemOperand());
7006         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7007         Mask = Mask.sext(VT.getSizeInBits());
7008         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7009                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7010         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7011                                     SDLoc(N0.getOperand(0)),
7012                                     N0.getOperand(0).getValueType(), ExtLoad);
7013         CombineTo(N, And);
7014         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7015         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
7016         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7017       }
7018     }
7019   }
7020 
7021   if (N0.getOpcode() == ISD::SETCC) {
7022     SDValue N00 = N0.getOperand(0);
7023     SDValue N01 = N0.getOperand(1);
7024     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7025     EVT N00VT = N0.getOperand(0).getValueType();
7026 
7027     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7028     // Only do this before legalize for now.
7029     if (VT.isVector() && !LegalOperations &&
7030         TLI.getBooleanContents(N00VT) ==
7031             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7032       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7033       // of the same size as the compared operands. Only optimize sext(setcc())
7034       // if this is the case.
7035       EVT SVT = getSetCCResultType(N00VT);
7036 
7037       // We know that the # elements of the results is the same as the
7038       // # elements of the compare (and the # elements of the compare result
7039       // for that matter).  Check to see that they are the same size.  If so,
7040       // we know that the element size of the sext'd result matches the
7041       // element size of the compare operands.
7042       if (VT.getSizeInBits() == SVT.getSizeInBits())
7043         return DAG.getSetCC(DL, VT, N00, N01, CC);
7044 
7045       // If the desired elements are smaller or larger than the source
7046       // elements, we can use a matching integer vector type and then
7047       // truncate/sign extend.
7048       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7049       if (SVT == MatchingVecType) {
7050         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7051         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7052       }
7053     }
7054 
7055     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7056     // Here, T can be 1 or -1, depending on the type of the setcc and
7057     // getBooleanContents().
7058     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7059 
7060     // To determine the "true" side of the select, we need to know the high bit
7061     // of the value returned by the setcc if it evaluates to true.
7062     // If the type of the setcc is i1, then the true case of the select is just
7063     // sext(i1 1), that is, -1.
7064     // If the type of the setcc is larger (say, i8) then the value of the high
7065     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7066     // of the appropriate width.
7067     SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
7068                                            : TLI.getConstTrueVal(DAG, VT, DL);
7069     SDValue Zero = DAG.getConstant(0, DL, VT);
7070     if (SDValue SCC =
7071             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7072       return SCC;
7073 
7074     if (!VT.isVector()) {
7075       EVT SetCCVT = getSetCCResultType(N00VT);
7076       // Don't do this transform for i1 because there's a select transform
7077       // that would reverse it.
7078       // TODO: We should not do this transform at all without a target hook
7079       // because a sext is likely cheaper than a select?
7080       if (SetCCVT.getScalarSizeInBits() != 1 &&
7081           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7082         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7083         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7084       }
7085     }
7086   }
7087 
7088   // fold (sext x) -> (zext x) if the sign bit is known zero.
7089   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7090       DAG.SignBitIsZero(N0))
7091     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7092 
7093   return SDValue();
7094 }
7095 
7096 // isTruncateOf - If N is a truncate of some other value, return true, record
7097 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
7098 // This function computes KnownZero to avoid a duplicated call to
7099 // computeKnownBits in the caller.
7100 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7101                          APInt &KnownZero) {
7102   APInt KnownOne;
7103   if (N->getOpcode() == ISD::TRUNCATE) {
7104     Op = N->getOperand(0);
7105     DAG.computeKnownBits(Op, KnownZero, KnownOne);
7106     return true;
7107   }
7108 
7109   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7110       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7111     return false;
7112 
7113   SDValue Op0 = N->getOperand(0);
7114   SDValue Op1 = N->getOperand(1);
7115   assert(Op0.getValueType() == Op1.getValueType());
7116 
7117   if (isNullConstant(Op0))
7118     Op = Op1;
7119   else if (isNullConstant(Op1))
7120     Op = Op0;
7121   else
7122     return false;
7123 
7124   DAG.computeKnownBits(Op, KnownZero, KnownOne);
7125 
7126   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
7127     return false;
7128 
7129   return true;
7130 }
7131 
7132 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7133   SDValue N0 = N->getOperand(0);
7134   EVT VT = N->getValueType(0);
7135 
7136   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7137                                               LegalOperations))
7138     return SDValue(Res, 0);
7139 
7140   // fold (zext (zext x)) -> (zext x)
7141   // fold (zext (aext x)) -> (zext x)
7142   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7143     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7144                        N0.getOperand(0));
7145 
7146   // fold (zext (truncate x)) -> (zext x) or
7147   //      (zext (truncate x)) -> (truncate x)
7148   // This is valid when the truncated bits of x are already zero.
7149   // FIXME: We should extend this to work for vectors too.
7150   SDValue Op;
7151   APInt KnownZero;
7152   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
7153     APInt TruncatedBits =
7154       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7155       APInt(Op.getValueSizeInBits(), 0) :
7156       APInt::getBitsSet(Op.getValueSizeInBits(),
7157                         N0.getValueSizeInBits(),
7158                         std::min(Op.getValueSizeInBits(),
7159                                  VT.getSizeInBits()));
7160     if (TruncatedBits == (KnownZero & TruncatedBits)) {
7161       if (VT.bitsGT(Op.getValueType()))
7162         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
7163       if (VT.bitsLT(Op.getValueType()))
7164         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
7165 
7166       return Op;
7167     }
7168   }
7169 
7170   // fold (zext (truncate (load x))) -> (zext (smaller load x))
7171   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
7172   if (N0.getOpcode() == ISD::TRUNCATE) {
7173     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7174       SDNode *oye = N0.getOperand(0).getNode();
7175       if (NarrowLoad.getNode() != N0.getNode()) {
7176         CombineTo(N0.getNode(), NarrowLoad);
7177         // CombineTo deleted the truncate, if needed, but not what's under it.
7178         AddToWorklist(oye);
7179       }
7180       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7181     }
7182   }
7183 
7184   // fold (zext (truncate x)) -> (and x, mask)
7185   if (N0.getOpcode() == ISD::TRUNCATE) {
7186     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7187     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7188     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7189       SDNode *oye = N0.getOperand(0).getNode();
7190       if (NarrowLoad.getNode() != N0.getNode()) {
7191         CombineTo(N0.getNode(), NarrowLoad);
7192         // CombineTo deleted the truncate, if needed, but not what's under it.
7193         AddToWorklist(oye);
7194       }
7195       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7196     }
7197 
7198     EVT SrcVT = N0.getOperand(0).getValueType();
7199     EVT MinVT = N0.getValueType();
7200 
7201     // Try to mask before the extension to avoid having to generate a larger mask,
7202     // possibly over several sub-vectors.
7203     if (SrcVT.bitsLT(VT)) {
7204       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7205                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7206         SDValue Op = N0.getOperand(0);
7207         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7208         AddToWorklist(Op.getNode());
7209         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7210       }
7211     }
7212 
7213     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7214       SDValue Op = N0.getOperand(0);
7215       if (SrcVT.bitsLT(VT)) {
7216         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
7217         AddToWorklist(Op.getNode());
7218       } else if (SrcVT.bitsGT(VT)) {
7219         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
7220         AddToWorklist(Op.getNode());
7221       }
7222       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7223     }
7224   }
7225 
7226   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7227   // if either of the casts is not free.
7228   if (N0.getOpcode() == ISD::AND &&
7229       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7230       N0.getOperand(1).getOpcode() == ISD::Constant &&
7231       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7232                            N0.getValueType()) ||
7233        !TLI.isZExtFree(N0.getValueType(), VT))) {
7234     SDValue X = N0.getOperand(0).getOperand(0);
7235     if (X.getValueType().bitsLT(VT)) {
7236       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
7237     } else if (X.getValueType().bitsGT(VT)) {
7238       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7239     }
7240     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7241     Mask = Mask.zext(VT.getSizeInBits());
7242     SDLoc DL(N);
7243     return DAG.getNode(ISD::AND, DL, VT,
7244                        X, DAG.getConstant(Mask, DL, VT));
7245   }
7246 
7247   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7248   // Only generate vector extloads when 1) they're legal, and 2) they are
7249   // deemed desirable by the target.
7250   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7251       ((!LegalOperations && !VT.isVector() &&
7252         !cast<LoadSDNode>(N0)->isVolatile()) ||
7253        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7254     bool DoXform = true;
7255     SmallVector<SDNode*, 4> SetCCs;
7256     if (!N0.hasOneUse())
7257       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
7258     if (VT.isVector())
7259       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7260     if (DoXform) {
7261       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7262       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7263                                        LN0->getChain(),
7264                                        LN0->getBasePtr(), N0.getValueType(),
7265                                        LN0->getMemOperand());
7266 
7267       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7268                                   N0.getValueType(), ExtLoad);
7269       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7270 
7271       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7272                       ISD::ZERO_EXTEND);
7273       CombineTo(N, ExtLoad);
7274       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7275     }
7276   }
7277 
7278   // fold (zext (load x)) to multiple smaller zextloads.
7279   // Only on illegal but splittable vectors.
7280   if (SDValue ExtLoad = CombineExtLoad(N))
7281     return ExtLoad;
7282 
7283   // fold (zext (and/or/xor (load x), cst)) ->
7284   //      (and/or/xor (zextload x), (zext cst))
7285   // Unless (and (load x) cst) will match as a zextload already and has
7286   // additional users.
7287   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7288        N0.getOpcode() == ISD::XOR) &&
7289       isa<LoadSDNode>(N0.getOperand(0)) &&
7290       N0.getOperand(1).getOpcode() == ISD::Constant &&
7291       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
7292       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7293     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
7294     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
7295       bool DoXform = true;
7296       SmallVector<SDNode*, 4> SetCCs;
7297       if (!N0.hasOneUse()) {
7298         if (N0.getOpcode() == ISD::AND) {
7299           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7300           auto NarrowLoad = false;
7301           EVT LoadResultTy = AndC->getValueType(0);
7302           EVT ExtVT, LoadedVT;
7303           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
7304                                NarrowLoad))
7305             DoXform = false;
7306         }
7307         if (DoXform)
7308           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
7309                                             ISD::ZERO_EXTEND, SetCCs, TLI);
7310       }
7311       if (DoXform) {
7312         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
7313                                          LN0->getChain(), LN0->getBasePtr(),
7314                                          LN0->getMemoryVT(),
7315                                          LN0->getMemOperand());
7316         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7317         Mask = Mask.zext(VT.getSizeInBits());
7318         SDLoc DL(N);
7319         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7320                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7321         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
7322                                     SDLoc(N0.getOperand(0)),
7323                                     N0.getOperand(0).getValueType(), ExtLoad);
7324         CombineTo(N, And);
7325         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
7326         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
7327                         ISD::ZERO_EXTEND);
7328         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7329       }
7330     }
7331   }
7332 
7333   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7334   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7335   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7336       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7337     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7338     EVT MemVT = LN0->getMemoryVT();
7339     if ((!LegalOperations && !LN0->isVolatile()) ||
7340         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7341       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7342                                        LN0->getChain(),
7343                                        LN0->getBasePtr(), MemVT,
7344                                        LN0->getMemOperand());
7345       CombineTo(N, ExtLoad);
7346       CombineTo(N0.getNode(),
7347                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
7348                             ExtLoad),
7349                 ExtLoad.getValue(1));
7350       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7351     }
7352   }
7353 
7354   if (N0.getOpcode() == ISD::SETCC) {
7355     // Only do this before legalize for now.
7356     if (!LegalOperations && VT.isVector() &&
7357         N0.getValueType().getVectorElementType() == MVT::i1) {
7358       EVT N00VT = N0.getOperand(0).getValueType();
7359       if (getSetCCResultType(N00VT) == N0.getValueType())
7360         return SDValue();
7361 
7362       // We know that the # elements of the results is the same as the #
7363       // elements of the compare (and the # elements of the compare result for
7364       // that matter). Check to see that they are the same size. If so, we know
7365       // that the element size of the sext'd result matches the element size of
7366       // the compare operands.
7367       SDLoc DL(N);
7368       SDValue VecOnes = DAG.getConstant(1, DL, VT);
7369       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
7370         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
7371         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
7372                                      N0.getOperand(1), N0.getOperand(2));
7373         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
7374       }
7375 
7376       // If the desired elements are smaller or larger than the source
7377       // elements we can use a matching integer vector type and then
7378       // truncate/sign extend.
7379       EVT MatchingElementType = EVT::getIntegerVT(
7380           *DAG.getContext(), N00VT.getScalarSizeInBits());
7381       EVT MatchingVectorType = EVT::getVectorVT(
7382           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
7383       SDValue VsetCC =
7384           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
7385                       N0.getOperand(1), N0.getOperand(2));
7386       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
7387                          VecOnes);
7388     }
7389 
7390     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7391     SDLoc DL(N);
7392     if (SDValue SCC = SimplifySelectCC(
7393             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7394             DAG.getConstant(0, DL, VT),
7395             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7396       return SCC;
7397   }
7398 
7399   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
7400   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
7401       isa<ConstantSDNode>(N0.getOperand(1)) &&
7402       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
7403       N0.hasOneUse()) {
7404     SDValue ShAmt = N0.getOperand(1);
7405     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
7406     if (N0.getOpcode() == ISD::SHL) {
7407       SDValue InnerZExt = N0.getOperand(0);
7408       // If the original shl may be shifting out bits, do not perform this
7409       // transformation.
7410       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
7411         InnerZExt.getOperand(0).getValueSizeInBits();
7412       if (ShAmtVal > KnownZeroBits)
7413         return SDValue();
7414     }
7415 
7416     SDLoc DL(N);
7417 
7418     // Ensure that the shift amount is wide enough for the shifted value.
7419     if (VT.getSizeInBits() >= 256)
7420       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
7421 
7422     return DAG.getNode(N0.getOpcode(), DL, VT,
7423                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
7424                        ShAmt);
7425   }
7426 
7427   return SDValue();
7428 }
7429 
7430 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
7431   SDValue N0 = N->getOperand(0);
7432   EVT VT = N->getValueType(0);
7433 
7434   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7435                                               LegalOperations))
7436     return SDValue(Res, 0);
7437 
7438   // fold (aext (aext x)) -> (aext x)
7439   // fold (aext (zext x)) -> (zext x)
7440   // fold (aext (sext x)) -> (sext x)
7441   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
7442       N0.getOpcode() == ISD::ZERO_EXTEND ||
7443       N0.getOpcode() == ISD::SIGN_EXTEND)
7444     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7445 
7446   // fold (aext (truncate (load x))) -> (aext (smaller load x))
7447   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
7448   if (N0.getOpcode() == ISD::TRUNCATE) {
7449     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7450       SDNode *oye = N0.getOperand(0).getNode();
7451       if (NarrowLoad.getNode() != N0.getNode()) {
7452         CombineTo(N0.getNode(), NarrowLoad);
7453         // CombineTo deleted the truncate, if needed, but not what's under it.
7454         AddToWorklist(oye);
7455       }
7456       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7457     }
7458   }
7459 
7460   // fold (aext (truncate x))
7461   if (N0.getOpcode() == ISD::TRUNCATE) {
7462     SDValue TruncOp = N0.getOperand(0);
7463     if (TruncOp.getValueType() == VT)
7464       return TruncOp; // x iff x size == zext size.
7465     if (TruncOp.getValueType().bitsGT(VT))
7466       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
7467     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
7468   }
7469 
7470   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
7471   // if the trunc is not free.
7472   if (N0.getOpcode() == ISD::AND &&
7473       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7474       N0.getOperand(1).getOpcode() == ISD::Constant &&
7475       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7476                           N0.getValueType())) {
7477     SDLoc DL(N);
7478     SDValue X = N0.getOperand(0).getOperand(0);
7479     if (X.getValueType().bitsLT(VT)) {
7480       X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
7481     } else if (X.getValueType().bitsGT(VT)) {
7482       X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
7483     }
7484     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7485     Mask = Mask.zext(VT.getSizeInBits());
7486     return DAG.getNode(ISD::AND, DL, VT,
7487                        X, DAG.getConstant(Mask, DL, VT));
7488   }
7489 
7490   // fold (aext (load x)) -> (aext (truncate (extload x)))
7491   // None of the supported targets knows how to perform load and any_ext
7492   // on vectors in one instruction.  We only perform this transformation on
7493   // scalars.
7494   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
7495       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7496       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
7497     bool DoXform = true;
7498     SmallVector<SDNode*, 4> SetCCs;
7499     if (!N0.hasOneUse())
7500       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
7501     if (DoXform) {
7502       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7503       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
7504                                        LN0->getChain(),
7505                                        LN0->getBasePtr(), N0.getValueType(),
7506                                        LN0->getMemOperand());
7507       CombineTo(N, ExtLoad);
7508       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7509                                   N0.getValueType(), ExtLoad);
7510       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
7511       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
7512                       ISD::ANY_EXTEND);
7513       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7514     }
7515   }
7516 
7517   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
7518   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
7519   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
7520   if (N0.getOpcode() == ISD::LOAD &&
7521       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7522       N0.hasOneUse()) {
7523     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7524     ISD::LoadExtType ExtType = LN0->getExtensionType();
7525     EVT MemVT = LN0->getMemoryVT();
7526     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
7527       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
7528                                        VT, LN0->getChain(), LN0->getBasePtr(),
7529                                        MemVT, LN0->getMemOperand());
7530       CombineTo(N, ExtLoad);
7531       CombineTo(N0.getNode(),
7532                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7533                             N0.getValueType(), ExtLoad),
7534                 ExtLoad.getValue(1));
7535       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7536     }
7537   }
7538 
7539   if (N0.getOpcode() == ISD::SETCC) {
7540     // For vectors:
7541     // aext(setcc) -> vsetcc
7542     // aext(setcc) -> truncate(vsetcc)
7543     // aext(setcc) -> aext(vsetcc)
7544     // Only do this before legalize for now.
7545     if (VT.isVector() && !LegalOperations) {
7546       EVT N0VT = N0.getOperand(0).getValueType();
7547         // We know that the # elements of the results is the same as the
7548         // # elements of the compare (and the # elements of the compare result
7549         // for that matter).  Check to see that they are the same size.  If so,
7550         // we know that the element size of the sext'd result matches the
7551         // element size of the compare operands.
7552       if (VT.getSizeInBits() == N0VT.getSizeInBits())
7553         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
7554                              N0.getOperand(1),
7555                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
7556       // If the desired elements are smaller or larger than the source
7557       // elements we can use a matching integer vector type and then
7558       // truncate/any extend
7559       else {
7560         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
7561         SDValue VsetCC =
7562           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
7563                         N0.getOperand(1),
7564                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
7565         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
7566       }
7567     }
7568 
7569     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
7570     SDLoc DL(N);
7571     if (SDValue SCC = SimplifySelectCC(
7572             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
7573             DAG.getConstant(0, DL, VT),
7574             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
7575       return SCC;
7576   }
7577 
7578   return SDValue();
7579 }
7580 
7581 SDValue DAGCombiner::visitAssertZext(SDNode *N) {
7582   SDValue N0 = N->getOperand(0);
7583   SDValue N1 = N->getOperand(1);
7584   EVT EVT = cast<VTSDNode>(N1)->getVT();
7585 
7586   // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
7587   if (N0.getOpcode() == ISD::AssertZext &&
7588       EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
7589     return N0;
7590 
7591   return SDValue();
7592 }
7593 
7594 /// See if the specified operand can be simplified with the knowledge that only
7595 /// the bits specified by Mask are used.  If so, return the simpler operand,
7596 /// otherwise return a null SDValue.
7597 ///
7598 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
7599 /// simplify nodes with multiple uses more aggressively.)
7600 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
7601   switch (V.getOpcode()) {
7602   default: break;
7603   case ISD::Constant: {
7604     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7605     assert(CV && "Const value should be ConstSDNode.");
7606     const APInt &CVal = CV->getAPIntValue();
7607     APInt NewVal = CVal & Mask;
7608     if (NewVal != CVal)
7609       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7610     break;
7611   }
7612   case ISD::OR:
7613   case ISD::XOR:
7614     // If the LHS or RHS don't contribute bits to the or, drop them.
7615     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7616       return V.getOperand(1);
7617     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7618       return V.getOperand(0);
7619     break;
7620   case ISD::SRL:
7621     // Only look at single-use SRLs.
7622     if (!V.getNode()->hasOneUse())
7623       break;
7624     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7625       // See if we can recursively simplify the LHS.
7626       unsigned Amt = RHSC->getZExtValue();
7627 
7628       // Watch out for shift count overflow though.
7629       if (Amt >= Mask.getBitWidth()) break;
7630       APInt NewMask = Mask << Amt;
7631       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7632         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7633                            SimplifyLHS, V.getOperand(1));
7634     }
7635     break;
7636   case ISD::AND: {
7637     // X & -1 -> X (ignoring bits which aren't demanded).
7638     ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
7639     if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
7640       return V.getOperand(0);
7641     break;
7642   }
7643   }
7644   return SDValue();
7645 }
7646 
7647 /// If the result of a wider load is shifted to right of N  bits and then
7648 /// truncated to a narrower type and where N is a multiple of number of bits of
7649 /// the narrower type, transform it to a narrower load from address + N / num of
7650 /// bits of new type. If the result is to be extended, also fold the extension
7651 /// to form a extending load.
7652 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7653   unsigned Opc = N->getOpcode();
7654 
7655   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
7656   SDValue N0 = N->getOperand(0);
7657   EVT VT = N->getValueType(0);
7658   EVT ExtVT = VT;
7659 
7660   // This transformation isn't valid for vector loads.
7661   if (VT.isVector())
7662     return SDValue();
7663 
7664   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7665   // extended to VT.
7666   if (Opc == ISD::SIGN_EXTEND_INREG) {
7667     ExtType = ISD::SEXTLOAD;
7668     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7669   } else if (Opc == ISD::SRL) {
7670     // Another special-case: SRL is basically zero-extending a narrower value.
7671     ExtType = ISD::ZEXTLOAD;
7672     N0 = SDValue(N, 0);
7673     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7674     if (!N01) return SDValue();
7675     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7676                               VT.getSizeInBits() - N01->getZExtValue());
7677   }
7678   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7679     return SDValue();
7680 
7681   unsigned EVTBits = ExtVT.getSizeInBits();
7682 
7683   // Do not generate loads of non-round integer types since these can
7684   // be expensive (and would be wrong if the type is not byte sized).
7685   if (!ExtVT.isRound())
7686     return SDValue();
7687 
7688   unsigned ShAmt = 0;
7689   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7690     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7691       ShAmt = N01->getZExtValue();
7692       // Is the shift amount a multiple of size of VT?
7693       if ((ShAmt & (EVTBits-1)) == 0) {
7694         N0 = N0.getOperand(0);
7695         // Is the load width a multiple of size of VT?
7696         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7697           return SDValue();
7698       }
7699 
7700       // At this point, we must have a load or else we can't do the transform.
7701       if (!isa<LoadSDNode>(N0)) return SDValue();
7702 
7703       // Because a SRL must be assumed to *need* to zero-extend the high bits
7704       // (as opposed to anyext the high bits), we can't combine the zextload
7705       // lowering of SRL and an sextload.
7706       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7707         return SDValue();
7708 
7709       // If the shift amount is larger than the input type then we're not
7710       // accessing any of the loaded bytes.  If the load was a zextload/extload
7711       // then the result of the shift+trunc is zero/undef (handled elsewhere).
7712       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7713         return SDValue();
7714     }
7715   }
7716 
7717   // If the load is shifted left (and the result isn't shifted back right),
7718   // we can fold the truncate through the shift.
7719   unsigned ShLeftAmt = 0;
7720   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7721       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7722     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7723       ShLeftAmt = N01->getZExtValue();
7724       N0 = N0.getOperand(0);
7725     }
7726   }
7727 
7728   // If we haven't found a load, we can't narrow it.  Don't transform one with
7729   // multiple uses, this would require adding a new load.
7730   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7731     return SDValue();
7732 
7733   // Don't change the width of a volatile load.
7734   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7735   if (LN0->isVolatile())
7736     return SDValue();
7737 
7738   // Verify that we are actually reducing a load width here.
7739   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7740     return SDValue();
7741 
7742   // For the transform to be legal, the load must produce only two values
7743   // (the value loaded and the chain).  Don't transform a pre-increment
7744   // load, for example, which produces an extra value.  Otherwise the
7745   // transformation is not equivalent, and the downstream logic to replace
7746   // uses gets things wrong.
7747   if (LN0->getNumValues() > 2)
7748     return SDValue();
7749 
7750   // If the load that we're shrinking is an extload and we're not just
7751   // discarding the extension we can't simply shrink the load. Bail.
7752   // TODO: It would be possible to merge the extensions in some cases.
7753   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7754       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7755     return SDValue();
7756 
7757   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7758     return SDValue();
7759 
7760   EVT PtrType = N0.getOperand(1).getValueType();
7761 
7762   if (PtrType == MVT::Untyped || PtrType.isExtended())
7763     // It's not possible to generate a constant of extended or untyped type.
7764     return SDValue();
7765 
7766   // For big endian targets, we need to adjust the offset to the pointer to
7767   // load the correct bytes.
7768   if (DAG.getDataLayout().isBigEndian()) {
7769     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7770     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7771     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7772   }
7773 
7774   uint64_t PtrOff = ShAmt / 8;
7775   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7776   SDLoc DL(LN0);
7777   // The original load itself didn't wrap, so an offset within it doesn't.
7778   SDNodeFlags Flags;
7779   Flags.setNoUnsignedWrap(true);
7780   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7781                                PtrType, LN0->getBasePtr(),
7782                                DAG.getConstant(PtrOff, DL, PtrType),
7783                                &Flags);
7784   AddToWorklist(NewPtr.getNode());
7785 
7786   SDValue Load;
7787   if (ExtType == ISD::NON_EXTLOAD)
7788     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7789                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7790                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7791   else
7792     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7793                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7794                           NewAlign, LN0->getMemOperand()->getFlags(),
7795                           LN0->getAAInfo());
7796 
7797   // Replace the old load's chain with the new load's chain.
7798   WorklistRemover DeadNodes(*this);
7799   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7800 
7801   // Shift the result left, if we've swallowed a left shift.
7802   SDValue Result = Load;
7803   if (ShLeftAmt != 0) {
7804     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7805     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7806       ShImmTy = VT;
7807     // If the shift amount is as large as the result size (but, presumably,
7808     // no larger than the source) then the useful bits of the result are
7809     // zero; we can't simply return the shortened shift, because the result
7810     // of that operation is undefined.
7811     SDLoc DL(N0);
7812     if (ShLeftAmt >= VT.getSizeInBits())
7813       Result = DAG.getConstant(0, DL, VT);
7814     else
7815       Result = DAG.getNode(ISD::SHL, DL, VT,
7816                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7817   }
7818 
7819   // Return the new loaded value.
7820   return Result;
7821 }
7822 
7823 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7824   SDValue N0 = N->getOperand(0);
7825   SDValue N1 = N->getOperand(1);
7826   EVT VT = N->getValueType(0);
7827   EVT EVT = cast<VTSDNode>(N1)->getVT();
7828   unsigned VTBits = VT.getScalarSizeInBits();
7829   unsigned EVTBits = EVT.getScalarSizeInBits();
7830 
7831   if (N0.isUndef())
7832     return DAG.getUNDEF(VT);
7833 
7834   // fold (sext_in_reg c1) -> c1
7835   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7836     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7837 
7838   // If the input is already sign extended, just drop the extension.
7839   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7840     return N0;
7841 
7842   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7843   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7844       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7845     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7846                        N0.getOperand(0), N1);
7847 
7848   // fold (sext_in_reg (sext x)) -> (sext x)
7849   // fold (sext_in_reg (aext x)) -> (sext x)
7850   // if x is small enough.
7851   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7852     SDValue N00 = N0.getOperand(0);
7853     if (N00.getScalarValueSizeInBits() <= EVTBits &&
7854         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7855       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7856   }
7857 
7858   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
7859   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
7860        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
7861        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
7862       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
7863     if (!LegalOperations ||
7864         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
7865       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
7866   }
7867 
7868   // fold (sext_in_reg (zext x)) -> (sext x)
7869   // iff we are extending the source sign bit.
7870   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
7871     SDValue N00 = N0.getOperand(0);
7872     if (N00.getScalarValueSizeInBits() == EVTBits &&
7873         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7874       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7875   }
7876 
7877   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7878   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
7879     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7880 
7881   // fold operands of sext_in_reg based on knowledge that the top bits are not
7882   // demanded.
7883   if (SimplifyDemandedBits(SDValue(N, 0)))
7884     return SDValue(N, 0);
7885 
7886   // fold (sext_in_reg (load x)) -> (smaller sextload x)
7887   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7888   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7889     return NarrowLoad;
7890 
7891   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7892   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7893   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7894   if (N0.getOpcode() == ISD::SRL) {
7895     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7896       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7897         // We can turn this into an SRA iff the input to the SRL is already sign
7898         // extended enough.
7899         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7900         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7901           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7902                              N0.getOperand(0), N0.getOperand(1));
7903       }
7904   }
7905 
7906   // fold (sext_inreg (extload x)) -> (sextload x)
7907   if (ISD::isEXTLoad(N0.getNode()) &&
7908       ISD::isUNINDEXEDLoad(N0.getNode()) &&
7909       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7910       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7911        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7912     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7913     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7914                                      LN0->getChain(),
7915                                      LN0->getBasePtr(), EVT,
7916                                      LN0->getMemOperand());
7917     CombineTo(N, ExtLoad);
7918     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7919     AddToWorklist(ExtLoad.getNode());
7920     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7921   }
7922   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7923   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7924       N0.hasOneUse() &&
7925       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7926       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7927        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7928     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7929     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7930                                      LN0->getChain(),
7931                                      LN0->getBasePtr(), EVT,
7932                                      LN0->getMemOperand());
7933     CombineTo(N, ExtLoad);
7934     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7935     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7936   }
7937 
7938   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7939   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7940     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7941                                            N0.getOperand(1), false))
7942       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7943                          BSwap, N1);
7944   }
7945 
7946   return SDValue();
7947 }
7948 
7949 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7950   SDValue N0 = N->getOperand(0);
7951   EVT VT = N->getValueType(0);
7952 
7953   if (N0.isUndef())
7954     return DAG.getUNDEF(VT);
7955 
7956   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7957                                               LegalOperations))
7958     return SDValue(Res, 0);
7959 
7960   return SDValue();
7961 }
7962 
7963 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7964   SDValue N0 = N->getOperand(0);
7965   EVT VT = N->getValueType(0);
7966 
7967   if (N0.isUndef())
7968     return DAG.getUNDEF(VT);
7969 
7970   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7971                                               LegalOperations))
7972     return SDValue(Res, 0);
7973 
7974   return SDValue();
7975 }
7976 
7977 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7978   SDValue N0 = N->getOperand(0);
7979   EVT VT = N->getValueType(0);
7980   bool isLE = DAG.getDataLayout().isLittleEndian();
7981 
7982   // noop truncate
7983   if (N0.getValueType() == N->getValueType(0))
7984     return N0;
7985   // fold (truncate c1) -> c1
7986   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7987     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7988   // fold (truncate (truncate x)) -> (truncate x)
7989   if (N0.getOpcode() == ISD::TRUNCATE)
7990     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7991   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7992   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7993       N0.getOpcode() == ISD::SIGN_EXTEND ||
7994       N0.getOpcode() == ISD::ANY_EXTEND) {
7995     // if the source is smaller than the dest, we still need an extend.
7996     if (N0.getOperand(0).getValueType().bitsLT(VT))
7997       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7998     // if the source is larger than the dest, than we just need the truncate.
7999     if (N0.getOperand(0).getValueType().bitsGT(VT))
8000       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8001     // if the source and dest are the same type, we can drop both the extend
8002     // and the truncate.
8003     return N0.getOperand(0);
8004   }
8005 
8006   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8007   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8008     return SDValue();
8009 
8010   // Fold extract-and-trunc into a narrow extract. For example:
8011   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8012   //   i32 y = TRUNCATE(i64 x)
8013   //        -- becomes --
8014   //   v16i8 b = BITCAST (v2i64 val)
8015   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8016   //
8017   // Note: We only run this optimization after type legalization (which often
8018   // creates this pattern) and before operation legalization after which
8019   // we need to be more careful about the vector instructions that we generate.
8020   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8021       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8022 
8023     EVT VecTy = N0.getOperand(0).getValueType();
8024     EVT ExTy = N0.getValueType();
8025     EVT TrTy = N->getValueType(0);
8026 
8027     unsigned NumElem = VecTy.getVectorNumElements();
8028     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8029 
8030     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8031     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8032 
8033     SDValue EltNo = N0->getOperand(1);
8034     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8035       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8036       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8037       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8038 
8039       SDLoc DL(N);
8040       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8041                          DAG.getBitcast(NVT, N0.getOperand(0)),
8042                          DAG.getConstant(Index, DL, IndexTy));
8043     }
8044   }
8045 
8046   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8047   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8048     EVT SrcVT = N0.getValueType();
8049     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8050         TLI.isTruncateFree(SrcVT, VT)) {
8051       SDLoc SL(N0);
8052       SDValue Cond = N0.getOperand(0);
8053       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8054       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8055       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8056     }
8057   }
8058 
8059   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8060   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8061       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8062       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8063     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
8064       uint64_t Amt = CAmt->getZExtValue();
8065       unsigned Size = VT.getScalarSizeInBits();
8066 
8067       if (Amt < Size) {
8068         SDLoc SL(N);
8069         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8070 
8071         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8072         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
8073                            DAG.getConstant(Amt, SL, AmtVT));
8074       }
8075     }
8076   }
8077 
8078   // Fold a series of buildvector, bitcast, and truncate if possible.
8079   // For example fold
8080   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8081   //   (2xi32 (buildvector x, y)).
8082   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8083       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8084       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8085       N0.getOperand(0).hasOneUse()) {
8086 
8087     SDValue BuildVect = N0.getOperand(0);
8088     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8089     EVT TruncVecEltTy = VT.getVectorElementType();
8090 
8091     // Check that the element types match.
8092     if (BuildVectEltTy == TruncVecEltTy) {
8093       // Now we only need to compute the offset of the truncated elements.
8094       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8095       unsigned TruncVecNumElts = VT.getVectorNumElements();
8096       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8097 
8098       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8099              "Invalid number of elements");
8100 
8101       SmallVector<SDValue, 8> Opnds;
8102       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8103         Opnds.push_back(BuildVect.getOperand(i));
8104 
8105       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8106     }
8107   }
8108 
8109   // See if we can simplify the input to this truncate through knowledge that
8110   // only the low bits are being used.
8111   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8112   // Currently we only perform this optimization on scalars because vectors
8113   // may have different active low bits.
8114   if (!VT.isVector()) {
8115     if (SDValue Shorter =
8116             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
8117                                                      VT.getSizeInBits())))
8118       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8119   }
8120 
8121   // fold (truncate (load x)) -> (smaller load x)
8122   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8123   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8124     if (SDValue Reduced = ReduceLoadWidth(N))
8125       return Reduced;
8126 
8127     // Handle the case where the load remains an extending load even
8128     // after truncation.
8129     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8130       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8131       if (!LN0->isVolatile() &&
8132           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8133         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8134                                          VT, LN0->getChain(), LN0->getBasePtr(),
8135                                          LN0->getMemoryVT(),
8136                                          LN0->getMemOperand());
8137         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8138         return NewLoad;
8139       }
8140     }
8141   }
8142 
8143   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8144   // where ... are all 'undef'.
8145   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8146     SmallVector<EVT, 8> VTs;
8147     SDValue V;
8148     unsigned Idx = 0;
8149     unsigned NumDefs = 0;
8150 
8151     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8152       SDValue X = N0.getOperand(i);
8153       if (!X.isUndef()) {
8154         V = X;
8155         Idx = i;
8156         NumDefs++;
8157       }
8158       // Stop if more than one members are non-undef.
8159       if (NumDefs > 1)
8160         break;
8161       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8162                                      VT.getVectorElementType(),
8163                                      X.getValueType().getVectorNumElements()));
8164     }
8165 
8166     if (NumDefs == 0)
8167       return DAG.getUNDEF(VT);
8168 
8169     if (NumDefs == 1) {
8170       assert(V.getNode() && "The single defined operand is empty!");
8171       SmallVector<SDValue, 8> Opnds;
8172       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8173         if (i != Idx) {
8174           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8175           continue;
8176         }
8177         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8178         AddToWorklist(NV.getNode());
8179         Opnds.push_back(NV);
8180       }
8181       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8182     }
8183   }
8184 
8185   // Fold truncate of a bitcast of a vector to an extract of the low vector
8186   // element.
8187   //
8188   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
8189   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8190     SDValue VecSrc = N0.getOperand(0);
8191     EVT SrcVT = VecSrc.getValueType();
8192     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8193         (!LegalOperations ||
8194          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8195       SDLoc SL(N);
8196 
8197       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8198       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8199                          VecSrc, DAG.getConstant(0, SL, IdxVT));
8200     }
8201   }
8202 
8203   // Simplify the operands using demanded-bits information.
8204   if (!VT.isVector() &&
8205       SimplifyDemandedBits(SDValue(N, 0)))
8206     return SDValue(N, 0);
8207 
8208   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8209   // When the adde's carry is not used.
8210   if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
8211       !N0.getNode()->hasAnyUseOfValue(1) &&
8212       (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
8213     SDLoc SL(N);
8214     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8215     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8216     return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
8217                        X, Y, N0.getOperand(2));
8218   }
8219 
8220   return SDValue();
8221 }
8222 
8223 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8224   SDValue Elt = N->getOperand(i);
8225   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8226     return Elt.getNode();
8227   return Elt.getOperand(Elt.getResNo()).getNode();
8228 }
8229 
8230 /// build_pair (load, load) -> load
8231 /// if load locations are consecutive.
8232 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8233   assert(N->getOpcode() == ISD::BUILD_PAIR);
8234 
8235   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8236   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8237   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8238       LD1->getAddressSpace() != LD2->getAddressSpace())
8239     return SDValue();
8240   EVT LD1VT = LD1->getValueType(0);
8241   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
8242   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8243       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8244     unsigned Align = LD1->getAlignment();
8245     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8246         VT.getTypeForEVT(*DAG.getContext()));
8247 
8248     if (NewAlign <= Align &&
8249         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8250       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8251                          LD1->getPointerInfo(), Align);
8252   }
8253 
8254   return SDValue();
8255 }
8256 
8257 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8258   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8259   // and Lo parts; on big-endian machines it doesn't.
8260   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8261 }
8262 
8263 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8264                                     const TargetLowering &TLI) {
8265   // If this is not a bitcast to an FP type or if the target doesn't have
8266   // IEEE754-compliant FP logic, we're done.
8267   EVT VT = N->getValueType(0);
8268   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8269     return SDValue();
8270 
8271   // TODO: Use splat values for the constant-checking below and remove this
8272   // restriction.
8273   SDValue N0 = N->getOperand(0);
8274   EVT SourceVT = N0.getValueType();
8275   if (SourceVT.isVector())
8276     return SDValue();
8277 
8278   unsigned FPOpcode;
8279   APInt SignMask;
8280   switch (N0.getOpcode()) {
8281   case ISD::AND:
8282     FPOpcode = ISD::FABS;
8283     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
8284     break;
8285   case ISD::XOR:
8286     FPOpcode = ISD::FNEG;
8287     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
8288     break;
8289   // TODO: ISD::OR --> ISD::FNABS?
8290   default:
8291     return SDValue();
8292   }
8293 
8294   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8295   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8296   SDValue LogicOp0 = N0.getOperand(0);
8297   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8298   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8299       LogicOp0.getOpcode() == ISD::BITCAST &&
8300       LogicOp0->getOperand(0).getValueType() == VT)
8301     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8302 
8303   return SDValue();
8304 }
8305 
8306 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8307   SDValue N0 = N->getOperand(0);
8308   EVT VT = N->getValueType(0);
8309 
8310   if (N0.isUndef())
8311     return DAG.getUNDEF(VT);
8312 
8313   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8314   // Only do this before legalize, since afterward the target may be depending
8315   // on the bitconvert.
8316   // First check to see if this is all constant.
8317   if (!LegalTypes &&
8318       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8319       VT.isVector()) {
8320     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8321 
8322     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8323     assert(!DestEltVT.isVector() &&
8324            "Element type of vector ValueType must not be vector!");
8325     if (isSimple)
8326       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8327   }
8328 
8329   // If the input is a constant, let getNode fold it.
8330   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8331     // If we can't allow illegal operations, we need to check that this is just
8332     // a fp -> int or int -> conversion and that the resulting operation will
8333     // be legal.
8334     if (!LegalOperations ||
8335         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8336          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8337         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8338          TLI.isOperationLegal(ISD::Constant, VT)))
8339       return DAG.getBitcast(VT, N0);
8340   }
8341 
8342   // (conv (conv x, t1), t2) -> (conv x, t2)
8343   if (N0.getOpcode() == ISD::BITCAST)
8344     return DAG.getBitcast(VT, N0.getOperand(0));
8345 
8346   // fold (conv (load x)) -> (load (conv*)x)
8347   // If the resultant load doesn't need a higher alignment than the original!
8348   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8349       // Do not change the width of a volatile load.
8350       !cast<LoadSDNode>(N0)->isVolatile() &&
8351       // Do not remove the cast if the types differ in endian layout.
8352       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8353           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8354       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8355       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8356     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8357     unsigned OrigAlign = LN0->getAlignment();
8358 
8359     bool Fast = false;
8360     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8361                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8362         Fast) {
8363       SDValue Load =
8364           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8365                       LN0->getPointerInfo(), OrigAlign,
8366                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8367       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8368       return Load;
8369     }
8370   }
8371 
8372   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
8373     return V;
8374 
8375   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
8376   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
8377   //
8378   // For ppc_fp128:
8379   // fold (bitcast (fneg x)) ->
8380   //     flipbit = signbit
8381   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8382   //
8383   // fold (bitcast (fabs x)) ->
8384   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
8385   //     (xor (bitcast x) (build_pair flipbit, flipbit))
8386   // This often reduces constant pool loads.
8387   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
8388        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
8389       N0.getNode()->hasOneUse() && VT.isInteger() &&
8390       !VT.isVector() && !N0.getValueType().isVector()) {
8391     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
8392     AddToWorklist(NewConv.getNode());
8393 
8394     SDLoc DL(N);
8395     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8396       assert(VT.getSizeInBits() == 128);
8397       SDValue SignBit = DAG.getConstant(
8398           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
8399       SDValue FlipBit;
8400       if (N0.getOpcode() == ISD::FNEG) {
8401         FlipBit = SignBit;
8402         AddToWorklist(FlipBit.getNode());
8403       } else {
8404         assert(N0.getOpcode() == ISD::FABS);
8405         SDValue Hi =
8406             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
8407                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8408                                               SDLoc(NewConv)));
8409         AddToWorklist(Hi.getNode());
8410         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
8411         AddToWorklist(FlipBit.getNode());
8412       }
8413       SDValue FlipBits =
8414           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8415       AddToWorklist(FlipBits.getNode());
8416       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
8417     }
8418     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
8419     if (N0.getOpcode() == ISD::FNEG)
8420       return DAG.getNode(ISD::XOR, DL, VT,
8421                          NewConv, DAG.getConstant(SignBit, DL, VT));
8422     assert(N0.getOpcode() == ISD::FABS);
8423     return DAG.getNode(ISD::AND, DL, VT,
8424                        NewConv, DAG.getConstant(~SignBit, DL, VT));
8425   }
8426 
8427   // fold (bitconvert (fcopysign cst, x)) ->
8428   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
8429   // Note that we don't handle (copysign x, cst) because this can always be
8430   // folded to an fneg or fabs.
8431   //
8432   // For ppc_fp128:
8433   // fold (bitcast (fcopysign cst, x)) ->
8434   //     flipbit = (and (extract_element
8435   //                     (xor (bitcast cst), (bitcast x)), 0),
8436   //                    signbit)
8437   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
8438   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
8439       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
8440       VT.isInteger() && !VT.isVector()) {
8441     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
8442     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
8443     if (isTypeLegal(IntXVT)) {
8444       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
8445       AddToWorklist(X.getNode());
8446 
8447       // If X has a different width than the result/lhs, sext it or truncate it.
8448       unsigned VTWidth = VT.getSizeInBits();
8449       if (OrigXWidth < VTWidth) {
8450         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
8451         AddToWorklist(X.getNode());
8452       } else if (OrigXWidth > VTWidth) {
8453         // To get the sign bit in the right place, we have to shift it right
8454         // before truncating.
8455         SDLoc DL(X);
8456         X = DAG.getNode(ISD::SRL, DL,
8457                         X.getValueType(), X,
8458                         DAG.getConstant(OrigXWidth-VTWidth, DL,
8459                                         X.getValueType()));
8460         AddToWorklist(X.getNode());
8461         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
8462         AddToWorklist(X.getNode());
8463       }
8464 
8465       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
8466         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
8467         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8468         AddToWorklist(Cst.getNode());
8469         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
8470         AddToWorklist(X.getNode());
8471         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
8472         AddToWorklist(XorResult.getNode());
8473         SDValue XorResult64 = DAG.getNode(
8474             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
8475             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
8476                                   SDLoc(XorResult)));
8477         AddToWorklist(XorResult64.getNode());
8478         SDValue FlipBit =
8479             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
8480                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
8481         AddToWorklist(FlipBit.getNode());
8482         SDValue FlipBits =
8483             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
8484         AddToWorklist(FlipBits.getNode());
8485         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
8486       }
8487       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
8488       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
8489                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
8490       AddToWorklist(X.getNode());
8491 
8492       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
8493       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
8494                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
8495       AddToWorklist(Cst.getNode());
8496 
8497       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
8498     }
8499   }
8500 
8501   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
8502   if (N0.getOpcode() == ISD::BUILD_PAIR)
8503     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
8504       return CombineLD;
8505 
8506   // Remove double bitcasts from shuffles - this is often a legacy of
8507   // XformToShuffleWithZero being used to combine bitmaskings (of
8508   // float vectors bitcast to integer vectors) into shuffles.
8509   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
8510   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
8511       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
8512       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
8513       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
8514     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
8515 
8516     // If operands are a bitcast, peek through if it casts the original VT.
8517     // If operands are a constant, just bitcast back to original VT.
8518     auto PeekThroughBitcast = [&](SDValue Op) {
8519       if (Op.getOpcode() == ISD::BITCAST &&
8520           Op.getOperand(0).getValueType() == VT)
8521         return SDValue(Op.getOperand(0));
8522       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
8523           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
8524         return DAG.getBitcast(VT, Op);
8525       return SDValue();
8526     };
8527 
8528     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
8529     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
8530     if (!(SV0 && SV1))
8531       return SDValue();
8532 
8533     int MaskScale =
8534         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
8535     SmallVector<int, 8> NewMask;
8536     for (int M : SVN->getMask())
8537       for (int i = 0; i != MaskScale; ++i)
8538         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
8539 
8540     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8541     if (!LegalMask) {
8542       std::swap(SV0, SV1);
8543       ShuffleVectorSDNode::commuteMask(NewMask);
8544       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
8545     }
8546 
8547     if (LegalMask)
8548       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
8549   }
8550 
8551   return SDValue();
8552 }
8553 
8554 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
8555   EVT VT = N->getValueType(0);
8556   return CombineConsecutiveLoads(N, VT);
8557 }
8558 
8559 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
8560 /// operands. DstEltVT indicates the destination element value type.
8561 SDValue DAGCombiner::
8562 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
8563   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
8564 
8565   // If this is already the right type, we're done.
8566   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
8567 
8568   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
8569   unsigned DstBitSize = DstEltVT.getSizeInBits();
8570 
8571   // If this is a conversion of N elements of one type to N elements of another
8572   // type, convert each element.  This handles FP<->INT cases.
8573   if (SrcBitSize == DstBitSize) {
8574     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8575                               BV->getValueType(0).getVectorNumElements());
8576 
8577     // Due to the FP element handling below calling this routine recursively,
8578     // we can end up with a scalar-to-vector node here.
8579     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
8580       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
8581                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
8582 
8583     SmallVector<SDValue, 8> Ops;
8584     for (SDValue Op : BV->op_values()) {
8585       // If the vector element type is not legal, the BUILD_VECTOR operands
8586       // are promoted and implicitly truncated.  Make that explicit here.
8587       if (Op.getValueType() != SrcEltVT)
8588         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
8589       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
8590       AddToWorklist(Ops.back().getNode());
8591     }
8592     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
8593   }
8594 
8595   // Otherwise, we're growing or shrinking the elements.  To avoid having to
8596   // handle annoying details of growing/shrinking FP values, we convert them to
8597   // int first.
8598   if (SrcEltVT.isFloatingPoint()) {
8599     // Convert the input float vector to a int vector where the elements are the
8600     // same sizes.
8601     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
8602     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
8603     SrcEltVT = IntVT;
8604   }
8605 
8606   // Now we know the input is an integer vector.  If the output is a FP type,
8607   // convert to integer first, then to FP of the right size.
8608   if (DstEltVT.isFloatingPoint()) {
8609     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
8610     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
8611 
8612     // Next, convert to FP elements of the same size.
8613     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
8614   }
8615 
8616   SDLoc DL(BV);
8617 
8618   // Okay, we know the src/dst types are both integers of differing types.
8619   // Handling growing first.
8620   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
8621   if (SrcBitSize < DstBitSize) {
8622     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
8623 
8624     SmallVector<SDValue, 8> Ops;
8625     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
8626          i += NumInputsPerOutput) {
8627       bool isLE = DAG.getDataLayout().isLittleEndian();
8628       APInt NewBits = APInt(DstBitSize, 0);
8629       bool EltIsUndef = true;
8630       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
8631         // Shift the previously computed bits over.
8632         NewBits <<= SrcBitSize;
8633         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
8634         if (Op.isUndef()) continue;
8635         EltIsUndef = false;
8636 
8637         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8638                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
8639       }
8640 
8641       if (EltIsUndef)
8642         Ops.push_back(DAG.getUNDEF(DstEltVT));
8643       else
8644         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8645     }
8646 
8647     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8648     return DAG.getBuildVector(VT, DL, Ops);
8649   }
8650 
8651   // Finally, this must be the case where we are shrinking elements: each input
8652   // turns into multiple outputs.
8653   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8654   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8655                             NumOutputsPerInput*BV->getNumOperands());
8656   SmallVector<SDValue, 8> Ops;
8657 
8658   for (const SDValue &Op : BV->op_values()) {
8659     if (Op.isUndef()) {
8660       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8661       continue;
8662     }
8663 
8664     APInt OpVal = cast<ConstantSDNode>(Op)->
8665                   getAPIntValue().zextOrTrunc(SrcBitSize);
8666 
8667     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8668       APInt ThisVal = OpVal.trunc(DstBitSize);
8669       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8670       OpVal = OpVal.lshr(DstBitSize);
8671     }
8672 
8673     // For big endian targets, swap the order of the pieces of each element.
8674     if (DAG.getDataLayout().isBigEndian())
8675       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8676   }
8677 
8678   return DAG.getBuildVector(VT, DL, Ops);
8679 }
8680 
8681 static bool isContractable(SDNode *N) {
8682   SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags;
8683   return F.hasAllowContract() || F.hasUnsafeAlgebra();
8684 }
8685 
8686 /// Try to perform FMA combining on a given FADD node.
8687 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8688   SDValue N0 = N->getOperand(0);
8689   SDValue N1 = N->getOperand(1);
8690   EVT VT = N->getValueType(0);
8691   SDLoc SL(N);
8692 
8693   const TargetOptions &Options = DAG.getTarget().Options;
8694 
8695   // Floating-point multiply-add with intermediate rounding.
8696   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8697 
8698   // Floating-point multiply-add without intermediate rounding.
8699   bool HasFMA =
8700       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8701       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8702 
8703   // No valid opcode, do not combine.
8704   if (!HasFMAD && !HasFMA)
8705     return SDValue();
8706 
8707   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
8708                               Options.UnsafeFPMath || HasFMAD);
8709   // If the addition is not contractable, do not combine.
8710   if (!AllowFusionGlobally && !isContractable(N))
8711     return SDValue();
8712 
8713   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8714   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
8715     return SDValue();
8716 
8717   // Always prefer FMAD to FMA for precision.
8718   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8719   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8720   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8721 
8722   // Is the node an FMUL and contractable either due to global flags or
8723   // SDNodeFlags.
8724   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
8725     if (N.getOpcode() != ISD::FMUL)
8726       return false;
8727     return AllowFusionGlobally || isContractable(N.getNode());
8728   };
8729   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8730   // prefer to fold the multiply with fewer uses.
8731   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
8732     if (N0.getNode()->use_size() > N1.getNode()->use_size())
8733       std::swap(N0, N1);
8734   }
8735 
8736   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8737   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
8738     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8739                        N0.getOperand(0), N0.getOperand(1), N1);
8740   }
8741 
8742   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8743   // Note: Commutes FADD operands.
8744   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
8745     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8746                        N1.getOperand(0), N1.getOperand(1), N0);
8747   }
8748 
8749   // Look through FP_EXTEND nodes to do more combining.
8750   if (LookThroughFPExt) {
8751     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8752     if (N0.getOpcode() == ISD::FP_EXTEND) {
8753       SDValue N00 = N0.getOperand(0);
8754       if (isContractableFMUL(N00))
8755         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8756                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8757                                        N00.getOperand(0)),
8758                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8759                                        N00.getOperand(1)), N1);
8760     }
8761 
8762     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
8763     // Note: Commutes FADD operands.
8764     if (N1.getOpcode() == ISD::FP_EXTEND) {
8765       SDValue N10 = N1.getOperand(0);
8766       if (isContractableFMUL(N10))
8767         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8768                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8769                                        N10.getOperand(0)),
8770                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8771                                        N10.getOperand(1)), N0);
8772     }
8773   }
8774 
8775   // More folding opportunities when target permits.
8776   if (Aggressive) {
8777     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8778     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8779     // are currently only supported on binary nodes.
8780     if (Options.UnsafeFPMath &&
8781         N0.getOpcode() == PreferredFusedOpcode &&
8782         N0.getOperand(2).getOpcode() == ISD::FMUL &&
8783         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8784       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8785                          N0.getOperand(0), N0.getOperand(1),
8786                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8787                                      N0.getOperand(2).getOperand(0),
8788                                      N0.getOperand(2).getOperand(1),
8789                                      N1));
8790     }
8791 
8792     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8793     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8794     // are currently only supported on binary nodes.
8795     if (Options.UnsafeFPMath &&
8796         N1->getOpcode() == PreferredFusedOpcode &&
8797         N1.getOperand(2).getOpcode() == ISD::FMUL &&
8798         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
8799       return DAG.getNode(PreferredFusedOpcode, SL, VT,
8800                          N1.getOperand(0), N1.getOperand(1),
8801                          DAG.getNode(PreferredFusedOpcode, SL, VT,
8802                                      N1.getOperand(2).getOperand(0),
8803                                      N1.getOperand(2).getOperand(1),
8804                                      N0));
8805     }
8806 
8807     if (/*AllowFusion &&*/ LookThroughFPExt) {
8808       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8809       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
8810       auto FoldFAddFMAFPExtFMul = [&] (
8811           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8812         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8813                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8814                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8815                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8816                                        Z));
8817       };
8818       if (N0.getOpcode() == PreferredFusedOpcode) {
8819         SDValue N02 = N0.getOperand(2);
8820         if (N02.getOpcode() == ISD::FP_EXTEND) {
8821           SDValue N020 = N02.getOperand(0);
8822           if (isContractableFMUL(N020))
8823             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8824                                         N020.getOperand(0), N020.getOperand(1),
8825                                         N1);
8826         }
8827       }
8828 
8829       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8830       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8831       // FIXME: This turns two single-precision and one double-precision
8832       // operation into two double-precision operations, which might not be
8833       // interesting for all targets, especially GPUs.
8834       auto FoldFAddFPExtFMAFMul = [&] (
8835           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8836         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8837                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8838                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8839                            DAG.getNode(PreferredFusedOpcode, SL, VT,
8840                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8841                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8842                                        Z));
8843       };
8844       if (N0.getOpcode() == ISD::FP_EXTEND) {
8845         SDValue N00 = N0.getOperand(0);
8846         if (N00.getOpcode() == PreferredFusedOpcode) {
8847           SDValue N002 = N00.getOperand(2);
8848           if (isContractableFMUL(N002))
8849             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
8850                                         N002.getOperand(0), N002.getOperand(1),
8851                                         N1);
8852         }
8853       }
8854 
8855       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
8856       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
8857       if (N1.getOpcode() == PreferredFusedOpcode) {
8858         SDValue N12 = N1.getOperand(2);
8859         if (N12.getOpcode() == ISD::FP_EXTEND) {
8860           SDValue N120 = N12.getOperand(0);
8861           if (isContractableFMUL(N120))
8862             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
8863                                         N120.getOperand(0), N120.getOperand(1),
8864                                         N0);
8865         }
8866       }
8867 
8868       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
8869       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
8870       // FIXME: This turns two single-precision and one double-precision
8871       // operation into two double-precision operations, which might not be
8872       // interesting for all targets, especially GPUs.
8873       if (N1.getOpcode() == ISD::FP_EXTEND) {
8874         SDValue N10 = N1.getOperand(0);
8875         if (N10.getOpcode() == PreferredFusedOpcode) {
8876           SDValue N102 = N10.getOperand(2);
8877           if (isContractableFMUL(N102))
8878             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
8879                                         N102.getOperand(0), N102.getOperand(1),
8880                                         N0);
8881         }
8882       }
8883     }
8884   }
8885 
8886   return SDValue();
8887 }
8888 
8889 /// Try to perform FMA combining on a given FSUB node.
8890 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8891   SDValue N0 = N->getOperand(0);
8892   SDValue N1 = N->getOperand(1);
8893   EVT VT = N->getValueType(0);
8894   SDLoc SL(N);
8895 
8896   const TargetOptions &Options = DAG.getTarget().Options;
8897   bool AllowFusion =
8898       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8899 
8900   // Floating-point multiply-add with intermediate rounding.
8901   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8902 
8903   // Floating-point multiply-add without intermediate rounding.
8904   bool HasFMA =
8905       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8906       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8907 
8908   // No valid opcode, do not combine.
8909   if (!HasFMAD && !HasFMA)
8910     return SDValue();
8911 
8912   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
8913   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8914     return SDValue();
8915 
8916   // Always prefer FMAD to FMA for precision.
8917   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8918   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8919   bool LookThroughFPExt = TLI.isFPExtFree(VT);
8920 
8921   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8922   if (N0.getOpcode() == ISD::FMUL &&
8923       (Aggressive || N0->hasOneUse())) {
8924     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8925                        N0.getOperand(0), N0.getOperand(1),
8926                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8927   }
8928 
8929   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8930   // Note: Commutes FSUB operands.
8931   if (N1.getOpcode() == ISD::FMUL &&
8932       (Aggressive || N1->hasOneUse()))
8933     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8934                        DAG.getNode(ISD::FNEG, SL, VT,
8935                                    N1.getOperand(0)),
8936                        N1.getOperand(1), N0);
8937 
8938   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8939   if (N0.getOpcode() == ISD::FNEG &&
8940       N0.getOperand(0).getOpcode() == ISD::FMUL &&
8941       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8942     SDValue N00 = N0.getOperand(0).getOperand(0);
8943     SDValue N01 = N0.getOperand(0).getOperand(1);
8944     return DAG.getNode(PreferredFusedOpcode, SL, VT,
8945                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8946                        DAG.getNode(ISD::FNEG, SL, VT, N1));
8947   }
8948 
8949   // Look through FP_EXTEND nodes to do more combining.
8950   if (AllowFusion && LookThroughFPExt) {
8951     // fold (fsub (fpext (fmul x, y)), z)
8952     //   -> (fma (fpext x), (fpext y), (fneg z))
8953     if (N0.getOpcode() == ISD::FP_EXTEND) {
8954       SDValue N00 = N0.getOperand(0);
8955       if (N00.getOpcode() == ISD::FMUL)
8956         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8957                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8958                                        N00.getOperand(0)),
8959                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8960                                        N00.getOperand(1)),
8961                            DAG.getNode(ISD::FNEG, SL, VT, N1));
8962     }
8963 
8964     // fold (fsub x, (fpext (fmul y, z)))
8965     //   -> (fma (fneg (fpext y)), (fpext z), x)
8966     // Note: Commutes FSUB operands.
8967     if (N1.getOpcode() == ISD::FP_EXTEND) {
8968       SDValue N10 = N1.getOperand(0);
8969       if (N10.getOpcode() == ISD::FMUL)
8970         return DAG.getNode(PreferredFusedOpcode, SL, VT,
8971                            DAG.getNode(ISD::FNEG, SL, VT,
8972                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
8973                                                    N10.getOperand(0))),
8974                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
8975                                        N10.getOperand(1)),
8976                            N0);
8977     }
8978 
8979     // fold (fsub (fpext (fneg (fmul, x, y))), z)
8980     //   -> (fneg (fma (fpext x), (fpext y), z))
8981     // Note: This could be removed with appropriate canonicalization of the
8982     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8983     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8984     // from implementing the canonicalization in visitFSUB.
8985     if (N0.getOpcode() == ISD::FP_EXTEND) {
8986       SDValue N00 = N0.getOperand(0);
8987       if (N00.getOpcode() == ISD::FNEG) {
8988         SDValue N000 = N00.getOperand(0);
8989         if (N000.getOpcode() == ISD::FMUL) {
8990           return DAG.getNode(ISD::FNEG, SL, VT,
8991                              DAG.getNode(PreferredFusedOpcode, SL, VT,
8992                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8993                                                      N000.getOperand(0)),
8994                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
8995                                                      N000.getOperand(1)),
8996                                          N1));
8997         }
8998       }
8999     }
9000 
9001     // fold (fsub (fneg (fpext (fmul, x, y))), z)
9002     //   -> (fneg (fma (fpext x)), (fpext y), z)
9003     // Note: This could be removed with appropriate canonicalization of the
9004     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9005     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9006     // from implementing the canonicalization in visitFSUB.
9007     if (N0.getOpcode() == ISD::FNEG) {
9008       SDValue N00 = N0.getOperand(0);
9009       if (N00.getOpcode() == ISD::FP_EXTEND) {
9010         SDValue N000 = N00.getOperand(0);
9011         if (N000.getOpcode() == ISD::FMUL) {
9012           return DAG.getNode(ISD::FNEG, SL, VT,
9013                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9014                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9015                                                      N000.getOperand(0)),
9016                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9017                                                      N000.getOperand(1)),
9018                                          N1));
9019         }
9020       }
9021     }
9022 
9023   }
9024 
9025   // More folding opportunities when target permits.
9026   if (Aggressive) {
9027     // fold (fsub (fma x, y, (fmul u, v)), z)
9028     //   -> (fma x, y (fma u, v, (fneg z)))
9029     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9030     // are currently only supported on binary nodes.
9031     if (Options.UnsafeFPMath &&
9032         N0.getOpcode() == PreferredFusedOpcode &&
9033         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9034         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9035       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9036                          N0.getOperand(0), N0.getOperand(1),
9037                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9038                                      N0.getOperand(2).getOperand(0),
9039                                      N0.getOperand(2).getOperand(1),
9040                                      DAG.getNode(ISD::FNEG, SL, VT,
9041                                                  N1)));
9042     }
9043 
9044     // fold (fsub x, (fma y, z, (fmul u, v)))
9045     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9046     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9047     // are currently only supported on binary nodes.
9048     if (Options.UnsafeFPMath &&
9049         N1.getOpcode() == PreferredFusedOpcode &&
9050         N1.getOperand(2).getOpcode() == ISD::FMUL) {
9051       SDValue N20 = N1.getOperand(2).getOperand(0);
9052       SDValue N21 = N1.getOperand(2).getOperand(1);
9053       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9054                          DAG.getNode(ISD::FNEG, SL, VT,
9055                                      N1.getOperand(0)),
9056                          N1.getOperand(1),
9057                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9058                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9059 
9060                                      N21, N0));
9061     }
9062 
9063     if (AllowFusion && LookThroughFPExt) {
9064       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9065       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9066       if (N0.getOpcode() == PreferredFusedOpcode) {
9067         SDValue N02 = N0.getOperand(2);
9068         if (N02.getOpcode() == ISD::FP_EXTEND) {
9069           SDValue N020 = N02.getOperand(0);
9070           if (N020.getOpcode() == ISD::FMUL)
9071             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9072                                N0.getOperand(0), N0.getOperand(1),
9073                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9074                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9075                                                        N020.getOperand(0)),
9076                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9077                                                        N020.getOperand(1)),
9078                                            DAG.getNode(ISD::FNEG, SL, VT,
9079                                                        N1)));
9080         }
9081       }
9082 
9083       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9084       //   -> (fma (fpext x), (fpext y),
9085       //           (fma (fpext u), (fpext v), (fneg z)))
9086       // FIXME: This turns two single-precision and one double-precision
9087       // operation into two double-precision operations, which might not be
9088       // interesting for all targets, especially GPUs.
9089       if (N0.getOpcode() == ISD::FP_EXTEND) {
9090         SDValue N00 = N0.getOperand(0);
9091         if (N00.getOpcode() == PreferredFusedOpcode) {
9092           SDValue N002 = N00.getOperand(2);
9093           if (N002.getOpcode() == ISD::FMUL)
9094             return DAG.getNode(PreferredFusedOpcode, SL, VT,
9095                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9096                                            N00.getOperand(0)),
9097                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
9098                                            N00.getOperand(1)),
9099                                DAG.getNode(PreferredFusedOpcode, SL, VT,
9100                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9101                                                        N002.getOperand(0)),
9102                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
9103                                                        N002.getOperand(1)),
9104                                            DAG.getNode(ISD::FNEG, SL, VT,
9105                                                        N1)));
9106         }
9107       }
9108 
9109       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9110       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9111       if (N1.getOpcode() == PreferredFusedOpcode &&
9112         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9113         SDValue N120 = N1.getOperand(2).getOperand(0);
9114         if (N120.getOpcode() == ISD::FMUL) {
9115           SDValue N1200 = N120.getOperand(0);
9116           SDValue N1201 = N120.getOperand(1);
9117           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9118                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9119                              N1.getOperand(1),
9120                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9121                                          DAG.getNode(ISD::FNEG, SL, VT,
9122                                              DAG.getNode(ISD::FP_EXTEND, SL,
9123                                                          VT, N1200)),
9124                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9125                                                      N1201),
9126                                          N0));
9127         }
9128       }
9129 
9130       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9131       //   -> (fma (fneg (fpext y)), (fpext z),
9132       //           (fma (fneg (fpext u)), (fpext v), x))
9133       // FIXME: This turns two single-precision and one double-precision
9134       // operation into two double-precision operations, which might not be
9135       // interesting for all targets, especially GPUs.
9136       if (N1.getOpcode() == ISD::FP_EXTEND &&
9137         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9138         SDValue N100 = N1.getOperand(0).getOperand(0);
9139         SDValue N101 = N1.getOperand(0).getOperand(1);
9140         SDValue N102 = N1.getOperand(0).getOperand(2);
9141         if (N102.getOpcode() == ISD::FMUL) {
9142           SDValue N1020 = N102.getOperand(0);
9143           SDValue N1021 = N102.getOperand(1);
9144           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9145                              DAG.getNode(ISD::FNEG, SL, VT,
9146                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9147                                                      N100)),
9148                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9149                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9150                                          DAG.getNode(ISD::FNEG, SL, VT,
9151                                              DAG.getNode(ISD::FP_EXTEND, SL,
9152                                                          VT, N1020)),
9153                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9154                                                      N1021),
9155                                          N0));
9156         }
9157       }
9158     }
9159   }
9160 
9161   return SDValue();
9162 }
9163 
9164 /// Try to perform FMA combining on a given FMUL node based on the distributive
9165 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9166 /// subtraction instead of addition).
9167 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9168   SDValue N0 = N->getOperand(0);
9169   SDValue N1 = N->getOperand(1);
9170   EVT VT = N->getValueType(0);
9171   SDLoc SL(N);
9172 
9173   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9174 
9175   const TargetOptions &Options = DAG.getTarget().Options;
9176 
9177   // The transforms below are incorrect when x == 0 and y == inf, because the
9178   // intermediate multiplication produces a nan.
9179   if (!Options.NoInfsFPMath)
9180     return SDValue();
9181 
9182   // Floating-point multiply-add without intermediate rounding.
9183   bool HasFMA =
9184       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9185       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9186       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9187 
9188   // Floating-point multiply-add with intermediate rounding. This can result
9189   // in a less precise result due to the changed rounding order.
9190   bool HasFMAD = Options.UnsafeFPMath &&
9191                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9192 
9193   // No valid opcode, do not combine.
9194   if (!HasFMAD && !HasFMA)
9195     return SDValue();
9196 
9197   // Always prefer FMAD to FMA for precision.
9198   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9199   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9200 
9201   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9202   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9203   auto FuseFADD = [&](SDValue X, SDValue Y) {
9204     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9205       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9206       if (XC1 && XC1->isExactlyValue(+1.0))
9207         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9208       if (XC1 && XC1->isExactlyValue(-1.0))
9209         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9210                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9211     }
9212     return SDValue();
9213   };
9214 
9215   if (SDValue FMA = FuseFADD(N0, N1))
9216     return FMA;
9217   if (SDValue FMA = FuseFADD(N1, N0))
9218     return FMA;
9219 
9220   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9221   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9222   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9223   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9224   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9225     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9226       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9227       if (XC0 && XC0->isExactlyValue(+1.0))
9228         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9229                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9230                            Y);
9231       if (XC0 && XC0->isExactlyValue(-1.0))
9232         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9233                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9234                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9235 
9236       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9237       if (XC1 && XC1->isExactlyValue(+1.0))
9238         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9239                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9240       if (XC1 && XC1->isExactlyValue(-1.0))
9241         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9242     }
9243     return SDValue();
9244   };
9245 
9246   if (SDValue FMA = FuseFSUB(N0, N1))
9247     return FMA;
9248   if (SDValue FMA = FuseFSUB(N1, N0))
9249     return FMA;
9250 
9251   return SDValue();
9252 }
9253 
9254 SDValue DAGCombiner::visitFADD(SDNode *N) {
9255   SDValue N0 = N->getOperand(0);
9256   SDValue N1 = N->getOperand(1);
9257   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9258   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9259   EVT VT = N->getValueType(0);
9260   SDLoc DL(N);
9261   const TargetOptions &Options = DAG.getTarget().Options;
9262   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9263 
9264   // fold vector ops
9265   if (VT.isVector())
9266     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9267       return FoldedVOp;
9268 
9269   // fold (fadd c1, c2) -> c1 + c2
9270   if (N0CFP && N1CFP)
9271     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9272 
9273   // canonicalize constant to RHS
9274   if (N0CFP && !N1CFP)
9275     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9276 
9277   if (SDValue NewSel = foldBinOpIntoSelect(N))
9278     return NewSel;
9279 
9280   // fold (fadd A, (fneg B)) -> (fsub A, B)
9281   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9282       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9283     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9284                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9285 
9286   // fold (fadd (fneg A), B) -> (fsub B, A)
9287   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9288       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9289     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9290                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9291 
9292   // FIXME: Auto-upgrade the target/function-level option.
9293   if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
9294     // fold (fadd A, 0) -> A
9295     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9296       if (N1C->isZero())
9297         return N0;
9298   }
9299 
9300   // If 'unsafe math' is enabled, fold lots of things.
9301   if (Options.UnsafeFPMath) {
9302     // No FP constant should be created after legalization as Instruction
9303     // Selection pass has a hard time dealing with FP constants.
9304     bool AllowNewConst = (Level < AfterLegalizeDAG);
9305 
9306     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9307     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9308         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9309       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9310                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9311                                      Flags),
9312                          Flags);
9313 
9314     // If allowed, fold (fadd (fneg x), x) -> 0.0
9315     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9316       return DAG.getConstantFP(0.0, DL, VT);
9317 
9318     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9319     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9320       return DAG.getConstantFP(0.0, DL, VT);
9321 
9322     // We can fold chains of FADD's of the same value into multiplications.
9323     // This transform is not safe in general because we are reducing the number
9324     // of rounding steps.
9325     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9326       if (N0.getOpcode() == ISD::FMUL) {
9327         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9328         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
9329 
9330         // (fadd (fmul x, c), x) -> (fmul x, c+1)
9331         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
9332           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9333                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9334           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
9335         }
9336 
9337         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
9338         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
9339             N1.getOperand(0) == N1.getOperand(1) &&
9340             N0.getOperand(0) == N1.getOperand(0)) {
9341           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
9342                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9343           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
9344         }
9345       }
9346 
9347       if (N1.getOpcode() == ISD::FMUL) {
9348         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9349         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
9350 
9351         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
9352         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
9353           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9354                                        DAG.getConstantFP(1.0, DL, VT), Flags);
9355           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
9356         }
9357 
9358         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
9359         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
9360             N0.getOperand(0) == N0.getOperand(1) &&
9361             N1.getOperand(0) == N0.getOperand(0)) {
9362           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
9363                                        DAG.getConstantFP(2.0, DL, VT), Flags);
9364           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
9365         }
9366       }
9367 
9368       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
9369         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
9370         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
9371         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
9372             (N0.getOperand(0) == N1)) {
9373           return DAG.getNode(ISD::FMUL, DL, VT,
9374                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
9375         }
9376       }
9377 
9378       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
9379         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
9380         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
9381         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
9382             N1.getOperand(0) == N0) {
9383           return DAG.getNode(ISD::FMUL, DL, VT,
9384                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
9385         }
9386       }
9387 
9388       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
9389       if (AllowNewConst &&
9390           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
9391           N0.getOperand(0) == N0.getOperand(1) &&
9392           N1.getOperand(0) == N1.getOperand(1) &&
9393           N0.getOperand(0) == N1.getOperand(0)) {
9394         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
9395                            DAG.getConstantFP(4.0, DL, VT), Flags);
9396       }
9397     }
9398   } // enable-unsafe-fp-math
9399 
9400   // FADD -> FMA combines:
9401   if (SDValue Fused = visitFADDForFMACombine(N)) {
9402     AddToWorklist(Fused.getNode());
9403     return Fused;
9404   }
9405   return SDValue();
9406 }
9407 
9408 SDValue DAGCombiner::visitFSUB(SDNode *N) {
9409   SDValue N0 = N->getOperand(0);
9410   SDValue N1 = N->getOperand(1);
9411   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9412   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9413   EVT VT = N->getValueType(0);
9414   SDLoc DL(N);
9415   const TargetOptions &Options = DAG.getTarget().Options;
9416   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9417 
9418   // fold vector ops
9419   if (VT.isVector())
9420     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9421       return FoldedVOp;
9422 
9423   // fold (fsub c1, c2) -> c1-c2
9424   if (N0CFP && N1CFP)
9425     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
9426 
9427   if (SDValue NewSel = foldBinOpIntoSelect(N))
9428     return NewSel;
9429 
9430   // fold (fsub A, (fneg B)) -> (fadd A, B)
9431   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9432     return DAG.getNode(ISD::FADD, DL, VT, N0,
9433                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9434 
9435   // FIXME: Auto-upgrade the target/function-level option.
9436   if (Options.NoSignedZerosFPMath  || N->getFlags()->hasNoSignedZeros()) {
9437     // (fsub 0, B) -> -B
9438     if (N0CFP && N0CFP->isZero()) {
9439       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
9440         return GetNegatedExpression(N1, DAG, LegalOperations);
9441       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9442         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
9443     }
9444   }
9445 
9446   // If 'unsafe math' is enabled, fold lots of things.
9447   if (Options.UnsafeFPMath) {
9448     // (fsub A, 0) -> A
9449     if (N1CFP && N1CFP->isZero())
9450       return N0;
9451 
9452     // (fsub x, x) -> 0.0
9453     if (N0 == N1)
9454       return DAG.getConstantFP(0.0f, DL, VT);
9455 
9456     // (fsub x, (fadd x, y)) -> (fneg y)
9457     // (fsub x, (fadd y, x)) -> (fneg y)
9458     if (N1.getOpcode() == ISD::FADD) {
9459       SDValue N10 = N1->getOperand(0);
9460       SDValue N11 = N1->getOperand(1);
9461 
9462       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
9463         return GetNegatedExpression(N11, DAG, LegalOperations);
9464 
9465       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
9466         return GetNegatedExpression(N10, DAG, LegalOperations);
9467     }
9468   }
9469 
9470   // FSUB -> FMA combines:
9471   if (SDValue Fused = visitFSUBForFMACombine(N)) {
9472     AddToWorklist(Fused.getNode());
9473     return Fused;
9474   }
9475 
9476   return SDValue();
9477 }
9478 
9479 SDValue DAGCombiner::visitFMUL(SDNode *N) {
9480   SDValue N0 = N->getOperand(0);
9481   SDValue N1 = N->getOperand(1);
9482   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9483   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9484   EVT VT = N->getValueType(0);
9485   SDLoc DL(N);
9486   const TargetOptions &Options = DAG.getTarget().Options;
9487   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9488 
9489   // fold vector ops
9490   if (VT.isVector()) {
9491     // This just handles C1 * C2 for vectors. Other vector folds are below.
9492     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9493       return FoldedVOp;
9494   }
9495 
9496   // fold (fmul c1, c2) -> c1*c2
9497   if (N0CFP && N1CFP)
9498     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
9499 
9500   // canonicalize constant to RHS
9501   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9502      !isConstantFPBuildVectorOrConstantFP(N1))
9503     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
9504 
9505   // fold (fmul A, 1.0) -> A
9506   if (N1CFP && N1CFP->isExactlyValue(1.0))
9507     return N0;
9508 
9509   if (SDValue NewSel = foldBinOpIntoSelect(N))
9510     return NewSel;
9511 
9512   if (Options.UnsafeFPMath) {
9513     // fold (fmul A, 0) -> 0
9514     if (N1CFP && N1CFP->isZero())
9515       return N1;
9516 
9517     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
9518     if (N0.getOpcode() == ISD::FMUL) {
9519       // Fold scalars or any vector constants (not just splats).
9520       // This fold is done in general by InstCombine, but extra fmul insts
9521       // may have been generated during lowering.
9522       SDValue N00 = N0.getOperand(0);
9523       SDValue N01 = N0.getOperand(1);
9524       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
9525       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
9526       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
9527 
9528       // Check 1: Make sure that the first operand of the inner multiply is NOT
9529       // a constant. Otherwise, we may induce infinite looping.
9530       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
9531         // Check 2: Make sure that the second operand of the inner multiply and
9532         // the second operand of the outer multiply are constants.
9533         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
9534             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
9535           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
9536           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
9537         }
9538       }
9539     }
9540 
9541     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
9542     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
9543     // during an early run of DAGCombiner can prevent folding with fmuls
9544     // inserted during lowering.
9545     if (N0.getOpcode() == ISD::FADD &&
9546         (N0.getOperand(0) == N0.getOperand(1)) &&
9547         N0.hasOneUse()) {
9548       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
9549       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
9550       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
9551     }
9552   }
9553 
9554   // fold (fmul X, 2.0) -> (fadd X, X)
9555   if (N1CFP && N1CFP->isExactlyValue(+2.0))
9556     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
9557 
9558   // fold (fmul X, -1.0) -> (fneg X)
9559   if (N1CFP && N1CFP->isExactlyValue(-1.0))
9560     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9561       return DAG.getNode(ISD::FNEG, DL, VT, N0);
9562 
9563   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
9564   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9565     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9566       // Both can be negated for free, check to see if at least one is cheaper
9567       // negated.
9568       if (LHSNeg == 2 || RHSNeg == 2)
9569         return DAG.getNode(ISD::FMUL, DL, VT,
9570                            GetNegatedExpression(N0, DAG, LegalOperations),
9571                            GetNegatedExpression(N1, DAG, LegalOperations),
9572                            Flags);
9573     }
9574   }
9575 
9576   // FMUL -> FMA combines:
9577   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
9578     AddToWorklist(Fused.getNode());
9579     return Fused;
9580   }
9581 
9582   return SDValue();
9583 }
9584 
9585 SDValue DAGCombiner::visitFMA(SDNode *N) {
9586   SDValue N0 = N->getOperand(0);
9587   SDValue N1 = N->getOperand(1);
9588   SDValue N2 = N->getOperand(2);
9589   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9590   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9591   EVT VT = N->getValueType(0);
9592   SDLoc DL(N);
9593   const TargetOptions &Options = DAG.getTarget().Options;
9594 
9595   // Constant fold FMA.
9596   if (isa<ConstantFPSDNode>(N0) &&
9597       isa<ConstantFPSDNode>(N1) &&
9598       isa<ConstantFPSDNode>(N2)) {
9599     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
9600   }
9601 
9602   if (Options.UnsafeFPMath) {
9603     if (N0CFP && N0CFP->isZero())
9604       return N2;
9605     if (N1CFP && N1CFP->isZero())
9606       return N2;
9607   }
9608   // TODO: The FMA node should have flags that propagate to these nodes.
9609   if (N0CFP && N0CFP->isExactlyValue(1.0))
9610     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
9611   if (N1CFP && N1CFP->isExactlyValue(1.0))
9612     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
9613 
9614   // Canonicalize (fma c, x, y) -> (fma x, c, y)
9615   if (isConstantFPBuildVectorOrConstantFP(N0) &&
9616      !isConstantFPBuildVectorOrConstantFP(N1))
9617     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
9618 
9619   // TODO: FMA nodes should have flags that propagate to the created nodes.
9620   // For now, create a Flags object for use with all unsafe math transforms.
9621   SDNodeFlags Flags;
9622   Flags.setUnsafeAlgebra(true);
9623 
9624   if (Options.UnsafeFPMath) {
9625     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
9626     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
9627         isConstantFPBuildVectorOrConstantFP(N1) &&
9628         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
9629       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9630                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
9631                                      &Flags), &Flags);
9632     }
9633 
9634     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
9635     if (N0.getOpcode() == ISD::FMUL &&
9636         isConstantFPBuildVectorOrConstantFP(N1) &&
9637         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
9638       return DAG.getNode(ISD::FMA, DL, VT,
9639                          N0.getOperand(0),
9640                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
9641                                      &Flags),
9642                          N2);
9643     }
9644   }
9645 
9646   // (fma x, 1, y) -> (fadd x, y)
9647   // (fma x, -1, y) -> (fadd (fneg x), y)
9648   if (N1CFP) {
9649     if (N1CFP->isExactlyValue(1.0))
9650       // TODO: The FMA node should have flags that propagate to this node.
9651       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
9652 
9653     if (N1CFP->isExactlyValue(-1.0) &&
9654         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
9655       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
9656       AddToWorklist(RHSNeg.getNode());
9657       // TODO: The FMA node should have flags that propagate to this node.
9658       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9659     }
9660   }
9661 
9662   if (Options.UnsafeFPMath) {
9663     // (fma x, c, x) -> (fmul x, (c+1))
9664     if (N1CFP && N0 == N2) {
9665       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9666                          DAG.getNode(ISD::FADD, DL, VT, N1,
9667                                      DAG.getConstantFP(1.0, DL, VT), &Flags),
9668                          &Flags);
9669     }
9670 
9671     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9672     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9673       return DAG.getNode(ISD::FMUL, DL, VT, N0,
9674                          DAG.getNode(ISD::FADD, DL, VT, N1,
9675                                      DAG.getConstantFP(-1.0, DL, VT), &Flags),
9676                          &Flags);
9677     }
9678   }
9679 
9680   return SDValue();
9681 }
9682 
9683 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9684 // reciprocal.
9685 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9686 // Notice that this is not always beneficial. One reason is different targets
9687 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9688 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9689 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9690 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9691   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9692   const SDNodeFlags *Flags = N->getFlags();
9693   if (!UnsafeMath && !Flags->hasAllowReciprocal())
9694     return SDValue();
9695 
9696   // Skip if current node is a reciprocal.
9697   SDValue N0 = N->getOperand(0);
9698   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9699   if (N0CFP && N0CFP->isExactlyValue(1.0))
9700     return SDValue();
9701 
9702   // Exit early if the target does not want this transform or if there can't
9703   // possibly be enough uses of the divisor to make the transform worthwhile.
9704   SDValue N1 = N->getOperand(1);
9705   unsigned MinUses = TLI.combineRepeatedFPDivisors();
9706   if (!MinUses || N1->use_size() < MinUses)
9707     return SDValue();
9708 
9709   // Find all FDIV users of the same divisor.
9710   // Use a set because duplicates may be present in the user list.
9711   SetVector<SDNode *> Users;
9712   for (auto *U : N1->uses()) {
9713     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9714       // This division is eligible for optimization only if global unsafe math
9715       // is enabled or if this division allows reciprocal formation.
9716       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
9717         Users.insert(U);
9718     }
9719   }
9720 
9721   // Now that we have the actual number of divisor uses, make sure it meets
9722   // the minimum threshold specified by the target.
9723   if (Users.size() < MinUses)
9724     return SDValue();
9725 
9726   EVT VT = N->getValueType(0);
9727   SDLoc DL(N);
9728   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
9729   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
9730 
9731   // Dividend / Divisor -> Dividend * Reciprocal
9732   for (auto *U : Users) {
9733     SDValue Dividend = U->getOperand(0);
9734     if (Dividend != FPOne) {
9735       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
9736                                     Reciprocal, Flags);
9737       CombineTo(U, NewNode);
9738     } else if (U != Reciprocal.getNode()) {
9739       // In the absence of fast-math-flags, this user node is always the
9740       // same node as Reciprocal, but with FMF they may be different nodes.
9741       CombineTo(U, Reciprocal);
9742     }
9743   }
9744   return SDValue(N, 0);  // N was replaced.
9745 }
9746 
9747 SDValue DAGCombiner::visitFDIV(SDNode *N) {
9748   SDValue N0 = N->getOperand(0);
9749   SDValue N1 = N->getOperand(1);
9750   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9751   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9752   EVT VT = N->getValueType(0);
9753   SDLoc DL(N);
9754   const TargetOptions &Options = DAG.getTarget().Options;
9755   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9756 
9757   // fold vector ops
9758   if (VT.isVector())
9759     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9760       return FoldedVOp;
9761 
9762   // fold (fdiv c1, c2) -> c1/c2
9763   if (N0CFP && N1CFP)
9764     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
9765 
9766   if (SDValue NewSel = foldBinOpIntoSelect(N))
9767     return NewSel;
9768 
9769   if (Options.UnsafeFPMath) {
9770     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
9771     if (N1CFP) {
9772       // Compute the reciprocal 1.0 / c2.
9773       const APFloat &N1APF = N1CFP->getValueAPF();
9774       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
9775       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
9776       // Only do the transform if the reciprocal is a legal fp immediate that
9777       // isn't too nasty (eg NaN, denormal, ...).
9778       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
9779           (!LegalOperations ||
9780            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
9781            // backend)... we should handle this gracefully after Legalize.
9782            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
9783            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
9784            TLI.isFPImmLegal(Recip, VT)))
9785         return DAG.getNode(ISD::FMUL, DL, VT, N0,
9786                            DAG.getConstantFP(Recip, DL, VT), Flags);
9787     }
9788 
9789     // If this FDIV is part of a reciprocal square root, it may be folded
9790     // into a target-specific square root estimate instruction.
9791     if (N1.getOpcode() == ISD::FSQRT) {
9792       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
9793         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9794       }
9795     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
9796                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9797       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9798                                           Flags)) {
9799         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
9800         AddToWorklist(RV.getNode());
9801         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9802       }
9803     } else if (N1.getOpcode() == ISD::FP_ROUND &&
9804                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9805       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9806                                           Flags)) {
9807         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
9808         AddToWorklist(RV.getNode());
9809         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9810       }
9811     } else if (N1.getOpcode() == ISD::FMUL) {
9812       // Look through an FMUL. Even though this won't remove the FDIV directly,
9813       // it's still worthwhile to get rid of the FSQRT if possible.
9814       SDValue SqrtOp;
9815       SDValue OtherOp;
9816       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9817         SqrtOp = N1.getOperand(0);
9818         OtherOp = N1.getOperand(1);
9819       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
9820         SqrtOp = N1.getOperand(1);
9821         OtherOp = N1.getOperand(0);
9822       }
9823       if (SqrtOp.getNode()) {
9824         // We found a FSQRT, so try to make this fold:
9825         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
9826         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
9827           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
9828           AddToWorklist(RV.getNode());
9829           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9830         }
9831       }
9832     }
9833 
9834     // Fold into a reciprocal estimate and multiply instead of a real divide.
9835     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
9836       AddToWorklist(RV.getNode());
9837       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9838     }
9839   }
9840 
9841   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
9842   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9843     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9844       // Both can be negated for free, check to see if at least one is cheaper
9845       // negated.
9846       if (LHSNeg == 2 || RHSNeg == 2)
9847         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
9848                            GetNegatedExpression(N0, DAG, LegalOperations),
9849                            GetNegatedExpression(N1, DAG, LegalOperations),
9850                            Flags);
9851     }
9852   }
9853 
9854   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
9855     return CombineRepeatedDivisors;
9856 
9857   return SDValue();
9858 }
9859 
9860 SDValue DAGCombiner::visitFREM(SDNode *N) {
9861   SDValue N0 = N->getOperand(0);
9862   SDValue N1 = N->getOperand(1);
9863   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9864   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9865   EVT VT = N->getValueType(0);
9866 
9867   // fold (frem c1, c2) -> fmod(c1,c2)
9868   if (N0CFP && N1CFP)
9869     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
9870                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
9871 
9872   if (SDValue NewSel = foldBinOpIntoSelect(N))
9873     return NewSel;
9874 
9875   return SDValue();
9876 }
9877 
9878 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
9879   if (!DAG.getTarget().Options.UnsafeFPMath)
9880     return SDValue();
9881 
9882   SDValue N0 = N->getOperand(0);
9883   if (TLI.isFsqrtCheap(N0, DAG))
9884     return SDValue();
9885 
9886   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
9887   // For now, create a Flags object for use with all unsafe math transforms.
9888   SDNodeFlags Flags;
9889   Flags.setUnsafeAlgebra(true);
9890   return buildSqrtEstimate(N0, &Flags);
9891 }
9892 
9893 /// copysign(x, fp_extend(y)) -> copysign(x, y)
9894 /// copysign(x, fp_round(y)) -> copysign(x, y)
9895 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
9896   SDValue N1 = N->getOperand(1);
9897   if ((N1.getOpcode() == ISD::FP_EXTEND ||
9898        N1.getOpcode() == ISD::FP_ROUND)) {
9899     // Do not optimize out type conversion of f128 type yet.
9900     // For some targets like x86_64, configuration is changed to keep one f128
9901     // value in one SSE register, but instruction selection cannot handle
9902     // FCOPYSIGN on SSE registers yet.
9903     EVT N1VT = N1->getValueType(0);
9904     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
9905     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
9906   }
9907   return false;
9908 }
9909 
9910 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
9911   SDValue N0 = N->getOperand(0);
9912   SDValue N1 = N->getOperand(1);
9913   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
9914   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
9915   EVT VT = N->getValueType(0);
9916 
9917   if (N0CFP && N1CFP) // Constant fold
9918     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
9919 
9920   if (N1CFP) {
9921     const APFloat &V = N1CFP->getValueAPF();
9922     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
9923     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9924     if (!V.isNegative()) {
9925       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9926         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9927     } else {
9928       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9929         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9930                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9931     }
9932   }
9933 
9934   // copysign(fabs(x), y) -> copysign(x, y)
9935   // copysign(fneg(x), y) -> copysign(x, y)
9936   // copysign(copysign(x,z), y) -> copysign(x, y)
9937   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9938       N0.getOpcode() == ISD::FCOPYSIGN)
9939     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
9940 
9941   // copysign(x, abs(y)) -> abs(x)
9942   if (N1.getOpcode() == ISD::FABS)
9943     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9944 
9945   // copysign(x, copysign(y,z)) -> copysign(x, z)
9946   if (N1.getOpcode() == ISD::FCOPYSIGN)
9947     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
9948 
9949   // copysign(x, fp_extend(y)) -> copysign(x, y)
9950   // copysign(x, fp_round(y)) -> copysign(x, y)
9951   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
9952     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
9953 
9954   return SDValue();
9955 }
9956 
9957 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9958   SDValue N0 = N->getOperand(0);
9959   EVT VT = N->getValueType(0);
9960   EVT OpVT = N0.getValueType();
9961 
9962   // fold (sint_to_fp c1) -> c1fp
9963   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9964       // ...but only if the target supports immediate floating-point values
9965       (!LegalOperations ||
9966        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9967     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9968 
9969   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9970   // but UINT_TO_FP is legal on this target, try to convert.
9971   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9972       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9973     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9974     if (DAG.SignBitIsZero(N0))
9975       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9976   }
9977 
9978   // The next optimizations are desirable only if SELECT_CC can be lowered.
9979   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9980     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9981     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9982         !VT.isVector() &&
9983         (!LegalOperations ||
9984          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9985       SDLoc DL(N);
9986       SDValue Ops[] =
9987         { N0.getOperand(0), N0.getOperand(1),
9988           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9989           N0.getOperand(2) };
9990       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9991     }
9992 
9993     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9994     //      (select_cc x, y, 1.0, 0.0,, cc)
9995     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9996         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9997         (!LegalOperations ||
9998          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9999       SDLoc DL(N);
10000       SDValue Ops[] =
10001         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10002           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10003           N0.getOperand(0).getOperand(2) };
10004       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10005     }
10006   }
10007 
10008   return SDValue();
10009 }
10010 
10011 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10012   SDValue N0 = N->getOperand(0);
10013   EVT VT = N->getValueType(0);
10014   EVT OpVT = N0.getValueType();
10015 
10016   // fold (uint_to_fp c1) -> c1fp
10017   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10018       // ...but only if the target supports immediate floating-point values
10019       (!LegalOperations ||
10020        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
10021     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10022 
10023   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10024   // but SINT_TO_FP is legal on this target, try to convert.
10025   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10026       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10027     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10028     if (DAG.SignBitIsZero(N0))
10029       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10030   }
10031 
10032   // The next optimizations are desirable only if SELECT_CC can be lowered.
10033   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10034     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10035 
10036     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10037         (!LegalOperations ||
10038          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
10039       SDLoc DL(N);
10040       SDValue Ops[] =
10041         { N0.getOperand(0), N0.getOperand(1),
10042           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10043           N0.getOperand(2) };
10044       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10045     }
10046   }
10047 
10048   return SDValue();
10049 }
10050 
10051 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10052 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10053   SDValue N0 = N->getOperand(0);
10054   EVT VT = N->getValueType(0);
10055 
10056   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10057     return SDValue();
10058 
10059   SDValue Src = N0.getOperand(0);
10060   EVT SrcVT = Src.getValueType();
10061   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10062   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10063 
10064   // We can safely assume the conversion won't overflow the output range,
10065   // because (for example) (uint8_t)18293.f is undefined behavior.
10066 
10067   // Since we can assume the conversion won't overflow, our decision as to
10068   // whether the input will fit in the float should depend on the minimum
10069   // of the input range and output range.
10070 
10071   // This means this is also safe for a signed input and unsigned output, since
10072   // a negative input would lead to undefined behavior.
10073   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10074   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10075   unsigned ActualSize = std::min(InputSize, OutputSize);
10076   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10077 
10078   // We can only fold away the float conversion if the input range can be
10079   // represented exactly in the float range.
10080   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10081     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10082       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10083                                                        : ISD::ZERO_EXTEND;
10084       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10085     }
10086     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10087       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10088     return DAG.getBitcast(VT, Src);
10089   }
10090   return SDValue();
10091 }
10092 
10093 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10094   SDValue N0 = N->getOperand(0);
10095   EVT VT = N->getValueType(0);
10096 
10097   // fold (fp_to_sint c1fp) -> c1
10098   if (isConstantFPBuildVectorOrConstantFP(N0))
10099     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10100 
10101   return FoldIntToFPToInt(N, DAG);
10102 }
10103 
10104 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10105   SDValue N0 = N->getOperand(0);
10106   EVT VT = N->getValueType(0);
10107 
10108   // fold (fp_to_uint c1fp) -> c1
10109   if (isConstantFPBuildVectorOrConstantFP(N0))
10110     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10111 
10112   return FoldIntToFPToInt(N, DAG);
10113 }
10114 
10115 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10116   SDValue N0 = N->getOperand(0);
10117   SDValue N1 = N->getOperand(1);
10118   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10119   EVT VT = N->getValueType(0);
10120 
10121   // fold (fp_round c1fp) -> c1fp
10122   if (N0CFP)
10123     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10124 
10125   // fold (fp_round (fp_extend x)) -> x
10126   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10127     return N0.getOperand(0);
10128 
10129   // fold (fp_round (fp_round x)) -> (fp_round x)
10130   if (N0.getOpcode() == ISD::FP_ROUND) {
10131     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10132     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10133 
10134     // Skip this folding if it results in an fp_round from f80 to f16.
10135     //
10136     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10137     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10138     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10139     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10140     // x86.
10141     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10142       return SDValue();
10143 
10144     // If the first fp_round isn't a value preserving truncation, it might
10145     // introduce a tie in the second fp_round, that wouldn't occur in the
10146     // single-step fp_round we want to fold to.
10147     // In other words, double rounding isn't the same as rounding.
10148     // Also, this is a value preserving truncation iff both fp_round's are.
10149     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10150       SDLoc DL(N);
10151       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10152                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10153     }
10154   }
10155 
10156   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10157   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10158     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10159                               N0.getOperand(0), N1);
10160     AddToWorklist(Tmp.getNode());
10161     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10162                        Tmp, N0.getOperand(1));
10163   }
10164 
10165   return SDValue();
10166 }
10167 
10168 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10169   SDValue N0 = N->getOperand(0);
10170   EVT VT = N->getValueType(0);
10171   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10172   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10173 
10174   // fold (fp_round_inreg c1fp) -> c1fp
10175   if (N0CFP && isTypeLegal(EVT)) {
10176     SDLoc DL(N);
10177     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10178     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10179   }
10180 
10181   return SDValue();
10182 }
10183 
10184 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10185   SDValue N0 = N->getOperand(0);
10186   EVT VT = N->getValueType(0);
10187 
10188   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10189   if (N->hasOneUse() &&
10190       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10191     return SDValue();
10192 
10193   // fold (fp_extend c1fp) -> c1fp
10194   if (isConstantFPBuildVectorOrConstantFP(N0))
10195     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10196 
10197   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10198   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10199       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10200     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10201 
10202   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10203   // value of X.
10204   if (N0.getOpcode() == ISD::FP_ROUND
10205       && N0.getConstantOperandVal(1) == 1) {
10206     SDValue In = N0.getOperand(0);
10207     if (In.getValueType() == VT) return In;
10208     if (VT.bitsLT(In.getValueType()))
10209       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10210                          In, N0.getOperand(1));
10211     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10212   }
10213 
10214   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10215   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10216        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10217     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10218     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10219                                      LN0->getChain(),
10220                                      LN0->getBasePtr(), N0.getValueType(),
10221                                      LN0->getMemOperand());
10222     CombineTo(N, ExtLoad);
10223     CombineTo(N0.getNode(),
10224               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10225                           N0.getValueType(), ExtLoad,
10226                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10227               ExtLoad.getValue(1));
10228     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10229   }
10230 
10231   return SDValue();
10232 }
10233 
10234 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10235   SDValue N0 = N->getOperand(0);
10236   EVT VT = N->getValueType(0);
10237 
10238   // fold (fceil c1) -> fceil(c1)
10239   if (isConstantFPBuildVectorOrConstantFP(N0))
10240     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10241 
10242   return SDValue();
10243 }
10244 
10245 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10246   SDValue N0 = N->getOperand(0);
10247   EVT VT = N->getValueType(0);
10248 
10249   // fold (ftrunc c1) -> ftrunc(c1)
10250   if (isConstantFPBuildVectorOrConstantFP(N0))
10251     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10252 
10253   return SDValue();
10254 }
10255 
10256 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
10257   SDValue N0 = N->getOperand(0);
10258   EVT VT = N->getValueType(0);
10259 
10260   // fold (ffloor c1) -> ffloor(c1)
10261   if (isConstantFPBuildVectorOrConstantFP(N0))
10262     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
10263 
10264   return SDValue();
10265 }
10266 
10267 // FIXME: FNEG and FABS have a lot in common; refactor.
10268 SDValue DAGCombiner::visitFNEG(SDNode *N) {
10269   SDValue N0 = N->getOperand(0);
10270   EVT VT = N->getValueType(0);
10271 
10272   // Constant fold FNEG.
10273   if (isConstantFPBuildVectorOrConstantFP(N0))
10274     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
10275 
10276   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
10277                          &DAG.getTarget().Options))
10278     return GetNegatedExpression(N0, DAG, LegalOperations);
10279 
10280   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
10281   // constant pool values.
10282   if (!TLI.isFNegFree(VT) &&
10283       N0.getOpcode() == ISD::BITCAST &&
10284       N0.getNode()->hasOneUse()) {
10285     SDValue Int = N0.getOperand(0);
10286     EVT IntVT = Int.getValueType();
10287     if (IntVT.isInteger() && !IntVT.isVector()) {
10288       APInt SignMask;
10289       if (N0.getValueType().isVector()) {
10290         // For a vector, get a mask such as 0x80... per scalar element
10291         // and splat it.
10292         SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
10293         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10294       } else {
10295         // For a scalar, just generate 0x80...
10296         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
10297       }
10298       SDLoc DL0(N0);
10299       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
10300                         DAG.getConstant(SignMask, DL0, IntVT));
10301       AddToWorklist(Int.getNode());
10302       return DAG.getBitcast(VT, Int);
10303     }
10304   }
10305 
10306   // (fneg (fmul c, x)) -> (fmul -c, x)
10307   if (N0.getOpcode() == ISD::FMUL &&
10308       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
10309     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
10310     if (CFP1) {
10311       APFloat CVal = CFP1->getValueAPF();
10312       CVal.changeSign();
10313       if (Level >= AfterLegalizeDAG &&
10314           (TLI.isFPImmLegal(CVal, VT) ||
10315            TLI.isOperationLegal(ISD::ConstantFP, VT)))
10316         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
10317                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10318                                        N0.getOperand(1)),
10319                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
10320     }
10321   }
10322 
10323   return SDValue();
10324 }
10325 
10326 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
10327   SDValue N0 = N->getOperand(0);
10328   SDValue N1 = N->getOperand(1);
10329   EVT VT = N->getValueType(0);
10330   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10331   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10332 
10333   if (N0CFP && N1CFP) {
10334     const APFloat &C0 = N0CFP->getValueAPF();
10335     const APFloat &C1 = N1CFP->getValueAPF();
10336     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
10337   }
10338 
10339   // Canonicalize to constant on RHS.
10340   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10341      !isConstantFPBuildVectorOrConstantFP(N1))
10342     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
10343 
10344   return SDValue();
10345 }
10346 
10347 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
10348   SDValue N0 = N->getOperand(0);
10349   SDValue N1 = N->getOperand(1);
10350   EVT VT = N->getValueType(0);
10351   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10352   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10353 
10354   if (N0CFP && N1CFP) {
10355     const APFloat &C0 = N0CFP->getValueAPF();
10356     const APFloat &C1 = N1CFP->getValueAPF();
10357     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
10358   }
10359 
10360   // Canonicalize to constant on RHS.
10361   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10362      !isConstantFPBuildVectorOrConstantFP(N1))
10363     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
10364 
10365   return SDValue();
10366 }
10367 
10368 SDValue DAGCombiner::visitFABS(SDNode *N) {
10369   SDValue N0 = N->getOperand(0);
10370   EVT VT = N->getValueType(0);
10371 
10372   // fold (fabs c1) -> fabs(c1)
10373   if (isConstantFPBuildVectorOrConstantFP(N0))
10374     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10375 
10376   // fold (fabs (fabs x)) -> (fabs x)
10377   if (N0.getOpcode() == ISD::FABS)
10378     return N->getOperand(0);
10379 
10380   // fold (fabs (fneg x)) -> (fabs x)
10381   // fold (fabs (fcopysign x, y)) -> (fabs x)
10382   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
10383     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
10384 
10385   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
10386   // constant pool values.
10387   if (!TLI.isFAbsFree(VT) &&
10388       N0.getOpcode() == ISD::BITCAST &&
10389       N0.getNode()->hasOneUse()) {
10390     SDValue Int = N0.getOperand(0);
10391     EVT IntVT = Int.getValueType();
10392     if (IntVT.isInteger() && !IntVT.isVector()) {
10393       APInt SignMask;
10394       if (N0.getValueType().isVector()) {
10395         // For a vector, get a mask such as 0x7f... per scalar element
10396         // and splat it.
10397         SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
10398         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
10399       } else {
10400         // For a scalar, just generate 0x7f...
10401         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
10402       }
10403       SDLoc DL(N0);
10404       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
10405                         DAG.getConstant(SignMask, DL, IntVT));
10406       AddToWorklist(Int.getNode());
10407       return DAG.getBitcast(N->getValueType(0), Int);
10408     }
10409   }
10410 
10411   return SDValue();
10412 }
10413 
10414 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
10415   SDValue Chain = N->getOperand(0);
10416   SDValue N1 = N->getOperand(1);
10417   SDValue N2 = N->getOperand(2);
10418 
10419   // If N is a constant we could fold this into a fallthrough or unconditional
10420   // branch. However that doesn't happen very often in normal code, because
10421   // Instcombine/SimplifyCFG should have handled the available opportunities.
10422   // If we did this folding here, it would be necessary to update the
10423   // MachineBasicBlock CFG, which is awkward.
10424 
10425   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
10426   // on the target.
10427   if (N1.getOpcode() == ISD::SETCC &&
10428       TLI.isOperationLegalOrCustom(ISD::BR_CC,
10429                                    N1.getOperand(0).getValueType())) {
10430     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10431                        Chain, N1.getOperand(2),
10432                        N1.getOperand(0), N1.getOperand(1), N2);
10433   }
10434 
10435   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
10436       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
10437        (N1.getOperand(0).hasOneUse() &&
10438         N1.getOperand(0).getOpcode() == ISD::SRL))) {
10439     SDNode *Trunc = nullptr;
10440     if (N1.getOpcode() == ISD::TRUNCATE) {
10441       // Look pass the truncate.
10442       Trunc = N1.getNode();
10443       N1 = N1.getOperand(0);
10444     }
10445 
10446     // Match this pattern so that we can generate simpler code:
10447     //
10448     //   %a = ...
10449     //   %b = and i32 %a, 2
10450     //   %c = srl i32 %b, 1
10451     //   brcond i32 %c ...
10452     //
10453     // into
10454     //
10455     //   %a = ...
10456     //   %b = and i32 %a, 2
10457     //   %c = setcc eq %b, 0
10458     //   brcond %c ...
10459     //
10460     // This applies only when the AND constant value has one bit set and the
10461     // SRL constant is equal to the log2 of the AND constant. The back-end is
10462     // smart enough to convert the result into a TEST/JMP sequence.
10463     SDValue Op0 = N1.getOperand(0);
10464     SDValue Op1 = N1.getOperand(1);
10465 
10466     if (Op0.getOpcode() == ISD::AND &&
10467         Op1.getOpcode() == ISD::Constant) {
10468       SDValue AndOp1 = Op0.getOperand(1);
10469 
10470       if (AndOp1.getOpcode() == ISD::Constant) {
10471         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
10472 
10473         if (AndConst.isPowerOf2() &&
10474             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
10475           SDLoc DL(N);
10476           SDValue SetCC =
10477             DAG.getSetCC(DL,
10478                          getSetCCResultType(Op0.getValueType()),
10479                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
10480                          ISD::SETNE);
10481 
10482           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
10483                                           MVT::Other, Chain, SetCC, N2);
10484           // Don't add the new BRCond into the worklist or else SimplifySelectCC
10485           // will convert it back to (X & C1) >> C2.
10486           CombineTo(N, NewBRCond, false);
10487           // Truncate is dead.
10488           if (Trunc)
10489             deleteAndRecombine(Trunc);
10490           // Replace the uses of SRL with SETCC
10491           WorklistRemover DeadNodes(*this);
10492           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10493           deleteAndRecombine(N1.getNode());
10494           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10495         }
10496       }
10497     }
10498 
10499     if (Trunc)
10500       // Restore N1 if the above transformation doesn't match.
10501       N1 = N->getOperand(1);
10502   }
10503 
10504   // Transform br(xor(x, y)) -> br(x != y)
10505   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
10506   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
10507     SDNode *TheXor = N1.getNode();
10508     SDValue Op0 = TheXor->getOperand(0);
10509     SDValue Op1 = TheXor->getOperand(1);
10510     if (Op0.getOpcode() == Op1.getOpcode()) {
10511       // Avoid missing important xor optimizations.
10512       if (SDValue Tmp = visitXOR(TheXor)) {
10513         if (Tmp.getNode() != TheXor) {
10514           DEBUG(dbgs() << "\nReplacing.8 ";
10515                 TheXor->dump(&DAG);
10516                 dbgs() << "\nWith: ";
10517                 Tmp.getNode()->dump(&DAG);
10518                 dbgs() << '\n');
10519           WorklistRemover DeadNodes(*this);
10520           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
10521           deleteAndRecombine(TheXor);
10522           return DAG.getNode(ISD::BRCOND, SDLoc(N),
10523                              MVT::Other, Chain, Tmp, N2);
10524         }
10525 
10526         // visitXOR has changed XOR's operands or replaced the XOR completely,
10527         // bail out.
10528         return SDValue(N, 0);
10529       }
10530     }
10531 
10532     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
10533       bool Equal = false;
10534       if (isOneConstant(Op0) && Op0.hasOneUse() &&
10535           Op0.getOpcode() == ISD::XOR) {
10536         TheXor = Op0.getNode();
10537         Equal = true;
10538       }
10539 
10540       EVT SetCCVT = N1.getValueType();
10541       if (LegalTypes)
10542         SetCCVT = getSetCCResultType(SetCCVT);
10543       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
10544                                    SetCCVT,
10545                                    Op0, Op1,
10546                                    Equal ? ISD::SETEQ : ISD::SETNE);
10547       // Replace the uses of XOR with SETCC
10548       WorklistRemover DeadNodes(*this);
10549       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
10550       deleteAndRecombine(N1.getNode());
10551       return DAG.getNode(ISD::BRCOND, SDLoc(N),
10552                          MVT::Other, Chain, SetCC, N2);
10553     }
10554   }
10555 
10556   return SDValue();
10557 }
10558 
10559 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
10560 //
10561 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
10562   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
10563   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
10564 
10565   // If N is a constant we could fold this into a fallthrough or unconditional
10566   // branch. However that doesn't happen very often in normal code, because
10567   // Instcombine/SimplifyCFG should have handled the available opportunities.
10568   // If we did this folding here, it would be necessary to update the
10569   // MachineBasicBlock CFG, which is awkward.
10570 
10571   // Use SimplifySetCC to simplify SETCC's.
10572   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
10573                                CondLHS, CondRHS, CC->get(), SDLoc(N),
10574                                false);
10575   if (Simp.getNode()) AddToWorklist(Simp.getNode());
10576 
10577   // fold to a simpler setcc
10578   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
10579     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
10580                        N->getOperand(0), Simp.getOperand(2),
10581                        Simp.getOperand(0), Simp.getOperand(1),
10582                        N->getOperand(4));
10583 
10584   return SDValue();
10585 }
10586 
10587 /// Return true if 'Use' is a load or a store that uses N as its base pointer
10588 /// and that N may be folded in the load / store addressing mode.
10589 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
10590                                     SelectionDAG &DAG,
10591                                     const TargetLowering &TLI) {
10592   EVT VT;
10593   unsigned AS;
10594 
10595   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
10596     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
10597       return false;
10598     VT = LD->getMemoryVT();
10599     AS = LD->getAddressSpace();
10600   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
10601     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
10602       return false;
10603     VT = ST->getMemoryVT();
10604     AS = ST->getAddressSpace();
10605   } else
10606     return false;
10607 
10608   TargetLowering::AddrMode AM;
10609   if (N->getOpcode() == ISD::ADD) {
10610     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10611     if (Offset)
10612       // [reg +/- imm]
10613       AM.BaseOffs = Offset->getSExtValue();
10614     else
10615       // [reg +/- reg]
10616       AM.Scale = 1;
10617   } else if (N->getOpcode() == ISD::SUB) {
10618     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
10619     if (Offset)
10620       // [reg +/- imm]
10621       AM.BaseOffs = -Offset->getSExtValue();
10622     else
10623       // [reg +/- reg]
10624       AM.Scale = 1;
10625   } else
10626     return false;
10627 
10628   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
10629                                    VT.getTypeForEVT(*DAG.getContext()), AS);
10630 }
10631 
10632 /// Try turning a load/store into a pre-indexed load/store when the base
10633 /// pointer is an add or subtract and it has other uses besides the load/store.
10634 /// After the transformation, the new indexed load/store has effectively folded
10635 /// the add/subtract in and all of its other uses are redirected to the
10636 /// new load/store.
10637 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
10638   if (Level < AfterLegalizeDAG)
10639     return false;
10640 
10641   bool isLoad = true;
10642   SDValue Ptr;
10643   EVT VT;
10644   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10645     if (LD->isIndexed())
10646       return false;
10647     VT = LD->getMemoryVT();
10648     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
10649         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
10650       return false;
10651     Ptr = LD->getBasePtr();
10652   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10653     if (ST->isIndexed())
10654       return false;
10655     VT = ST->getMemoryVT();
10656     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
10657         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
10658       return false;
10659     Ptr = ST->getBasePtr();
10660     isLoad = false;
10661   } else {
10662     return false;
10663   }
10664 
10665   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10666   // out.  There is no reason to make this a preinc/predec.
10667   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10668       Ptr.getNode()->hasOneUse())
10669     return false;
10670 
10671   // Ask the target to do addressing mode selection.
10672   SDValue BasePtr;
10673   SDValue Offset;
10674   ISD::MemIndexedMode AM = ISD::UNINDEXED;
10675   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10676     return false;
10677 
10678   // Backends without true r+i pre-indexed forms may need to pass a
10679   // constant base with a variable offset so that constant coercion
10680   // will work with the patterns in canonical form.
10681   bool Swapped = false;
10682   if (isa<ConstantSDNode>(BasePtr)) {
10683     std::swap(BasePtr, Offset);
10684     Swapped = true;
10685   }
10686 
10687   // Don't create a indexed load / store with zero offset.
10688   if (isNullConstant(Offset))
10689     return false;
10690 
10691   // Try turning it into a pre-indexed load / store except when:
10692   // 1) The new base ptr is a frame index.
10693   // 2) If N is a store and the new base ptr is either the same as or is a
10694   //    predecessor of the value being stored.
10695   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10696   //    that would create a cycle.
10697   // 4) All uses are load / store ops that use it as old base ptr.
10698 
10699   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
10700   // (plus the implicit offset) to a register to preinc anyway.
10701   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10702     return false;
10703 
10704   // Check #2.
10705   if (!isLoad) {
10706     SDValue Val = cast<StoreSDNode>(N)->getValue();
10707     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10708       return false;
10709   }
10710 
10711   // Caches for hasPredecessorHelper.
10712   SmallPtrSet<const SDNode *, 32> Visited;
10713   SmallVector<const SDNode *, 16> Worklist;
10714   Worklist.push_back(N);
10715 
10716   // If the offset is a constant, there may be other adds of constants that
10717   // can be folded with this one. We should do this to avoid having to keep
10718   // a copy of the original base pointer.
10719   SmallVector<SDNode *, 16> OtherUses;
10720   if (isa<ConstantSDNode>(Offset))
10721     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
10722                               UE = BasePtr.getNode()->use_end();
10723          UI != UE; ++UI) {
10724       SDUse &Use = UI.getUse();
10725       // Skip the use that is Ptr and uses of other results from BasePtr's
10726       // node (important for nodes that return multiple results).
10727       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
10728         continue;
10729 
10730       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
10731         continue;
10732 
10733       if (Use.getUser()->getOpcode() != ISD::ADD &&
10734           Use.getUser()->getOpcode() != ISD::SUB) {
10735         OtherUses.clear();
10736         break;
10737       }
10738 
10739       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
10740       if (!isa<ConstantSDNode>(Op1)) {
10741         OtherUses.clear();
10742         break;
10743       }
10744 
10745       // FIXME: In some cases, we can be smarter about this.
10746       if (Op1.getValueType() != Offset.getValueType()) {
10747         OtherUses.clear();
10748         break;
10749       }
10750 
10751       OtherUses.push_back(Use.getUser());
10752     }
10753 
10754   if (Swapped)
10755     std::swap(BasePtr, Offset);
10756 
10757   // Now check for #3 and #4.
10758   bool RealUse = false;
10759 
10760   for (SDNode *Use : Ptr.getNode()->uses()) {
10761     if (Use == N)
10762       continue;
10763     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
10764       return false;
10765 
10766     // If Ptr may be folded in addressing mode of other use, then it's
10767     // not profitable to do this transformation.
10768     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
10769       RealUse = true;
10770   }
10771 
10772   if (!RealUse)
10773     return false;
10774 
10775   SDValue Result;
10776   if (isLoad)
10777     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10778                                 BasePtr, Offset, AM);
10779   else
10780     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10781                                  BasePtr, Offset, AM);
10782   ++PreIndexedNodes;
10783   ++NodesCombined;
10784   DEBUG(dbgs() << "\nReplacing.4 ";
10785         N->dump(&DAG);
10786         dbgs() << "\nWith: ";
10787         Result.getNode()->dump(&DAG);
10788         dbgs() << '\n');
10789   WorklistRemover DeadNodes(*this);
10790   if (isLoad) {
10791     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10792     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10793   } else {
10794     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10795   }
10796 
10797   // Finally, since the node is now dead, remove it from the graph.
10798   deleteAndRecombine(N);
10799 
10800   if (Swapped)
10801     std::swap(BasePtr, Offset);
10802 
10803   // Replace other uses of BasePtr that can be updated to use Ptr
10804   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
10805     unsigned OffsetIdx = 1;
10806     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
10807       OffsetIdx = 0;
10808     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
10809            BasePtr.getNode() && "Expected BasePtr operand");
10810 
10811     // We need to replace ptr0 in the following expression:
10812     //   x0 * offset0 + y0 * ptr0 = t0
10813     // knowing that
10814     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
10815     //
10816     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
10817     // indexed load/store and the expresion that needs to be re-written.
10818     //
10819     // Therefore, we have:
10820     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
10821 
10822     ConstantSDNode *CN =
10823       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
10824     int X0, X1, Y0, Y1;
10825     const APInt &Offset0 = CN->getAPIntValue();
10826     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
10827 
10828     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
10829     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
10830     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
10831     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
10832 
10833     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
10834 
10835     APInt CNV = Offset0;
10836     if (X0 < 0) CNV = -CNV;
10837     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
10838     else CNV = CNV - Offset1;
10839 
10840     SDLoc DL(OtherUses[i]);
10841 
10842     // We can now generate the new expression.
10843     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
10844     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
10845 
10846     SDValue NewUse = DAG.getNode(Opcode,
10847                                  DL,
10848                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
10849     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
10850     deleteAndRecombine(OtherUses[i]);
10851   }
10852 
10853   // Replace the uses of Ptr with uses of the updated base value.
10854   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
10855   deleteAndRecombine(Ptr.getNode());
10856 
10857   return true;
10858 }
10859 
10860 /// Try to combine a load/store with a add/sub of the base pointer node into a
10861 /// post-indexed load/store. The transformation folded the add/subtract into the
10862 /// new indexed load/store effectively and all of its uses are redirected to the
10863 /// new load/store.
10864 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
10865   if (Level < AfterLegalizeDAG)
10866     return false;
10867 
10868   bool isLoad = true;
10869   SDValue Ptr;
10870   EVT VT;
10871   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
10872     if (LD->isIndexed())
10873       return false;
10874     VT = LD->getMemoryVT();
10875     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
10876         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
10877       return false;
10878     Ptr = LD->getBasePtr();
10879   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
10880     if (ST->isIndexed())
10881       return false;
10882     VT = ST->getMemoryVT();
10883     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
10884         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
10885       return false;
10886     Ptr = ST->getBasePtr();
10887     isLoad = false;
10888   } else {
10889     return false;
10890   }
10891 
10892   if (Ptr.getNode()->hasOneUse())
10893     return false;
10894 
10895   for (SDNode *Op : Ptr.getNode()->uses()) {
10896     if (Op == N ||
10897         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
10898       continue;
10899 
10900     SDValue BasePtr;
10901     SDValue Offset;
10902     ISD::MemIndexedMode AM = ISD::UNINDEXED;
10903     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
10904       // Don't create a indexed load / store with zero offset.
10905       if (isNullConstant(Offset))
10906         continue;
10907 
10908       // Try turning it into a post-indexed load / store except when
10909       // 1) All uses are load / store ops that use it as base ptr (and
10910       //    it may be folded as addressing mmode).
10911       // 2) Op must be independent of N, i.e. Op is neither a predecessor
10912       //    nor a successor of N. Otherwise, if Op is folded that would
10913       //    create a cycle.
10914 
10915       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10916         continue;
10917 
10918       // Check for #1.
10919       bool TryNext = false;
10920       for (SDNode *Use : BasePtr.getNode()->uses()) {
10921         if (Use == Ptr.getNode())
10922           continue;
10923 
10924         // If all the uses are load / store addresses, then don't do the
10925         // transformation.
10926         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10927           bool RealUse = false;
10928           for (SDNode *UseUse : Use->uses()) {
10929             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10930               RealUse = true;
10931           }
10932 
10933           if (!RealUse) {
10934             TryNext = true;
10935             break;
10936           }
10937         }
10938       }
10939 
10940       if (TryNext)
10941         continue;
10942 
10943       // Check for #2
10944       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10945         SDValue Result = isLoad
10946           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10947                                BasePtr, Offset, AM)
10948           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10949                                 BasePtr, Offset, AM);
10950         ++PostIndexedNodes;
10951         ++NodesCombined;
10952         DEBUG(dbgs() << "\nReplacing.5 ";
10953               N->dump(&DAG);
10954               dbgs() << "\nWith: ";
10955               Result.getNode()->dump(&DAG);
10956               dbgs() << '\n');
10957         WorklistRemover DeadNodes(*this);
10958         if (isLoad) {
10959           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10960           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10961         } else {
10962           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10963         }
10964 
10965         // Finally, since the node is now dead, remove it from the graph.
10966         deleteAndRecombine(N);
10967 
10968         // Replace the uses of Use with uses of the updated base value.
10969         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
10970                                       Result.getValue(isLoad ? 1 : 0));
10971         deleteAndRecombine(Op);
10972         return true;
10973       }
10974     }
10975   }
10976 
10977   return false;
10978 }
10979 
10980 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10981 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10982   ISD::MemIndexedMode AM = LD->getAddressingMode();
10983   assert(AM != ISD::UNINDEXED);
10984   SDValue BP = LD->getOperand(1);
10985   SDValue Inc = LD->getOperand(2);
10986 
10987   // Some backends use TargetConstants for load offsets, but don't expect
10988   // TargetConstants in general ADD nodes. We can convert these constants into
10989   // regular Constants (if the constant is not opaque).
10990   assert((Inc.getOpcode() != ISD::TargetConstant ||
10991           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10992          "Cannot split out indexing using opaque target constants");
10993   if (Inc.getOpcode() == ISD::TargetConstant) {
10994     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10995     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10996                           ConstInc->getValueType(0));
10997   }
10998 
10999   unsigned Opc =
11000       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11001   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11002 }
11003 
11004 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11005   LoadSDNode *LD  = cast<LoadSDNode>(N);
11006   SDValue Chain = LD->getChain();
11007   SDValue Ptr   = LD->getBasePtr();
11008 
11009   // If load is not volatile and there are no uses of the loaded value (and
11010   // the updated indexed value in case of indexed loads), change uses of the
11011   // chain value into uses of the chain input (i.e. delete the dead load).
11012   if (!LD->isVolatile()) {
11013     if (N->getValueType(1) == MVT::Other) {
11014       // Unindexed loads.
11015       if (!N->hasAnyUseOfValue(0)) {
11016         // It's not safe to use the two value CombineTo variant here. e.g.
11017         // v1, chain2 = load chain1, loc
11018         // v2, chain3 = load chain2, loc
11019         // v3         = add v2, c
11020         // Now we replace use of chain2 with chain1.  This makes the second load
11021         // isomorphic to the one we are deleting, and thus makes this load live.
11022         DEBUG(dbgs() << "\nReplacing.6 ";
11023               N->dump(&DAG);
11024               dbgs() << "\nWith chain: ";
11025               Chain.getNode()->dump(&DAG);
11026               dbgs() << "\n");
11027         WorklistRemover DeadNodes(*this);
11028         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11029         AddUsersToWorklist(Chain.getNode());
11030         if (N->use_empty())
11031           deleteAndRecombine(N);
11032 
11033         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11034       }
11035     } else {
11036       // Indexed loads.
11037       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11038 
11039       // If this load has an opaque TargetConstant offset, then we cannot split
11040       // the indexing into an add/sub directly (that TargetConstant may not be
11041       // valid for a different type of node, and we cannot convert an opaque
11042       // target constant into a regular constant).
11043       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11044                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11045 
11046       if (!N->hasAnyUseOfValue(0) &&
11047           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11048         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11049         SDValue Index;
11050         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11051           Index = SplitIndexingFromLoad(LD);
11052           // Try to fold the base pointer arithmetic into subsequent loads and
11053           // stores.
11054           AddUsersToWorklist(N);
11055         } else
11056           Index = DAG.getUNDEF(N->getValueType(1));
11057         DEBUG(dbgs() << "\nReplacing.7 ";
11058               N->dump(&DAG);
11059               dbgs() << "\nWith: ";
11060               Undef.getNode()->dump(&DAG);
11061               dbgs() << " and 2 other values\n");
11062         WorklistRemover DeadNodes(*this);
11063         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11064         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11065         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11066         deleteAndRecombine(N);
11067         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11068       }
11069     }
11070   }
11071 
11072   // If this load is directly stored, replace the load value with the stored
11073   // value.
11074   // TODO: Handle store large -> read small portion.
11075   // TODO: Handle TRUNCSTORE/LOADEXT
11076   if (OptLevel != CodeGenOpt::None &&
11077       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11078     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11079       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11080       if (PrevST->getBasePtr() == Ptr &&
11081           PrevST->getValue().getValueType() == N->getValueType(0))
11082         return CombineTo(N, PrevST->getOperand(1), Chain);
11083     }
11084   }
11085 
11086   // Try to infer better alignment information than the load already has.
11087   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11088     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11089       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11090         SDValue NewLoad = DAG.getExtLoad(
11091             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11092             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11093             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11094         if (NewLoad.getNode() != N)
11095           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11096       }
11097     }
11098   }
11099 
11100   if (LD->isUnindexed()) {
11101     // Walk up chain skipping non-aliasing memory nodes.
11102     SDValue BetterChain = FindBetterChain(N, Chain);
11103 
11104     // If there is a better chain.
11105     if (Chain != BetterChain) {
11106       SDValue ReplLoad;
11107 
11108       // Replace the chain to void dependency.
11109       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11110         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11111                                BetterChain, Ptr, LD->getMemOperand());
11112       } else {
11113         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11114                                   LD->getValueType(0),
11115                                   BetterChain, Ptr, LD->getMemoryVT(),
11116                                   LD->getMemOperand());
11117       }
11118 
11119       // Create token factor to keep old chain connected.
11120       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11121                                   MVT::Other, Chain, ReplLoad.getValue(1));
11122 
11123       // Make sure the new and old chains are cleaned up.
11124       AddToWorklist(Token.getNode());
11125 
11126       // Replace uses with load result and token factor. Don't add users
11127       // to work list.
11128       return CombineTo(N, ReplLoad.getValue(0), Token, false);
11129     }
11130   }
11131 
11132   // Try transforming N to an indexed load.
11133   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11134     return SDValue(N, 0);
11135 
11136   // Try to slice up N to more direct loads if the slices are mapped to
11137   // different register banks or pairing can take place.
11138   if (SliceUpLoad(N))
11139     return SDValue(N, 0);
11140 
11141   return SDValue();
11142 }
11143 
11144 namespace {
11145 /// \brief Helper structure used to slice a load in smaller loads.
11146 /// Basically a slice is obtained from the following sequence:
11147 /// Origin = load Ty1, Base
11148 /// Shift = srl Ty1 Origin, CstTy Amount
11149 /// Inst = trunc Shift to Ty2
11150 ///
11151 /// Then, it will be rewriten into:
11152 /// Slice = load SliceTy, Base + SliceOffset
11153 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11154 ///
11155 /// SliceTy is deduced from the number of bits that are actually used to
11156 /// build Inst.
11157 struct LoadedSlice {
11158   /// \brief Helper structure used to compute the cost of a slice.
11159   struct Cost {
11160     /// Are we optimizing for code size.
11161     bool ForCodeSize;
11162     /// Various cost.
11163     unsigned Loads;
11164     unsigned Truncates;
11165     unsigned CrossRegisterBanksCopies;
11166     unsigned ZExts;
11167     unsigned Shift;
11168 
11169     Cost(bool ForCodeSize = false)
11170         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
11171           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
11172 
11173     /// \brief Get the cost of one isolated slice.
11174     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11175         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
11176           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
11177       EVT TruncType = LS.Inst->getValueType(0);
11178       EVT LoadedType = LS.getLoadedType();
11179       if (TruncType != LoadedType &&
11180           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11181         ZExts = 1;
11182     }
11183 
11184     /// \brief Account for slicing gain in the current cost.
11185     /// Slicing provide a few gains like removing a shift or a
11186     /// truncate. This method allows to grow the cost of the original
11187     /// load with the gain from this slice.
11188     void addSliceGain(const LoadedSlice &LS) {
11189       // Each slice saves a truncate.
11190       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11191       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11192                               LS.Inst->getValueType(0)))
11193         ++Truncates;
11194       // If there is a shift amount, this slice gets rid of it.
11195       if (LS.Shift)
11196         ++Shift;
11197       // If this slice can merge a cross register bank copy, account for it.
11198       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11199         ++CrossRegisterBanksCopies;
11200     }
11201 
11202     Cost &operator+=(const Cost &RHS) {
11203       Loads += RHS.Loads;
11204       Truncates += RHS.Truncates;
11205       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11206       ZExts += RHS.ZExts;
11207       Shift += RHS.Shift;
11208       return *this;
11209     }
11210 
11211     bool operator==(const Cost &RHS) const {
11212       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11213              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11214              ZExts == RHS.ZExts && Shift == RHS.Shift;
11215     }
11216 
11217     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11218 
11219     bool operator<(const Cost &RHS) const {
11220       // Assume cross register banks copies are as expensive as loads.
11221       // FIXME: Do we want some more target hooks?
11222       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11223       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11224       // Unless we are optimizing for code size, consider the
11225       // expensive operation first.
11226       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11227         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11228       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11229              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11230     }
11231 
11232     bool operator>(const Cost &RHS) const { return RHS < *this; }
11233 
11234     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11235 
11236     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11237   };
11238   // The last instruction that represent the slice. This should be a
11239   // truncate instruction.
11240   SDNode *Inst;
11241   // The original load instruction.
11242   LoadSDNode *Origin;
11243   // The right shift amount in bits from the original load.
11244   unsigned Shift;
11245   // The DAG from which Origin came from.
11246   // This is used to get some contextual information about legal types, etc.
11247   SelectionDAG *DAG;
11248 
11249   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11250               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11251       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11252 
11253   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11254   /// \return Result is \p BitWidth and has used bits set to 1 and
11255   ///         not used bits set to 0.
11256   APInt getUsedBits() const {
11257     // Reproduce the trunc(lshr) sequence:
11258     // - Start from the truncated value.
11259     // - Zero extend to the desired bit width.
11260     // - Shift left.
11261     assert(Origin && "No original load to compare against.");
11262     unsigned BitWidth = Origin->getValueSizeInBits(0);
11263     assert(Inst && "This slice is not bound to an instruction");
11264     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11265            "Extracted slice is bigger than the whole type!");
11266     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11267     UsedBits.setAllBits();
11268     UsedBits = UsedBits.zext(BitWidth);
11269     UsedBits <<= Shift;
11270     return UsedBits;
11271   }
11272 
11273   /// \brief Get the size of the slice to be loaded in bytes.
11274   unsigned getLoadedSize() const {
11275     unsigned SliceSize = getUsedBits().countPopulation();
11276     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11277     return SliceSize / 8;
11278   }
11279 
11280   /// \brief Get the type that will be loaded for this slice.
11281   /// Note: This may not be the final type for the slice.
11282   EVT getLoadedType() const {
11283     assert(DAG && "Missing context");
11284     LLVMContext &Ctxt = *DAG->getContext();
11285     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11286   }
11287 
11288   /// \brief Get the alignment of the load used for this slice.
11289   unsigned getAlignment() const {
11290     unsigned Alignment = Origin->getAlignment();
11291     unsigned Offset = getOffsetFromBase();
11292     if (Offset != 0)
11293       Alignment = MinAlign(Alignment, Alignment + Offset);
11294     return Alignment;
11295   }
11296 
11297   /// \brief Check if this slice can be rewritten with legal operations.
11298   bool isLegal() const {
11299     // An invalid slice is not legal.
11300     if (!Origin || !Inst || !DAG)
11301       return false;
11302 
11303     // Offsets are for indexed load only, we do not handle that.
11304     if (!Origin->getOffset().isUndef())
11305       return false;
11306 
11307     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11308 
11309     // Check that the type is legal.
11310     EVT SliceType = getLoadedType();
11311     if (!TLI.isTypeLegal(SliceType))
11312       return false;
11313 
11314     // Check that the load is legal for this type.
11315     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
11316       return false;
11317 
11318     // Check that the offset can be computed.
11319     // 1. Check its type.
11320     EVT PtrType = Origin->getBasePtr().getValueType();
11321     if (PtrType == MVT::Untyped || PtrType.isExtended())
11322       return false;
11323 
11324     // 2. Check that it fits in the immediate.
11325     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
11326       return false;
11327 
11328     // 3. Check that the computation is legal.
11329     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
11330       return false;
11331 
11332     // Check that the zext is legal if it needs one.
11333     EVT TruncateType = Inst->getValueType(0);
11334     if (TruncateType != SliceType &&
11335         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
11336       return false;
11337 
11338     return true;
11339   }
11340 
11341   /// \brief Get the offset in bytes of this slice in the original chunk of
11342   /// bits.
11343   /// \pre DAG != nullptr.
11344   uint64_t getOffsetFromBase() const {
11345     assert(DAG && "Missing context.");
11346     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
11347     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
11348     uint64_t Offset = Shift / 8;
11349     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
11350     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
11351            "The size of the original loaded type is not a multiple of a"
11352            " byte.");
11353     // If Offset is bigger than TySizeInBytes, it means we are loading all
11354     // zeros. This should have been optimized before in the process.
11355     assert(TySizeInBytes > Offset &&
11356            "Invalid shift amount for given loaded size");
11357     if (IsBigEndian)
11358       Offset = TySizeInBytes - Offset - getLoadedSize();
11359     return Offset;
11360   }
11361 
11362   /// \brief Generate the sequence of instructions to load the slice
11363   /// represented by this object and redirect the uses of this slice to
11364   /// this new sequence of instructions.
11365   /// \pre this->Inst && this->Origin are valid Instructions and this
11366   /// object passed the legal check: LoadedSlice::isLegal returned true.
11367   /// \return The last instruction of the sequence used to load the slice.
11368   SDValue loadSlice() const {
11369     assert(Inst && Origin && "Unable to replace a non-existing slice.");
11370     const SDValue &OldBaseAddr = Origin->getBasePtr();
11371     SDValue BaseAddr = OldBaseAddr;
11372     // Get the offset in that chunk of bytes w.r.t. the endianness.
11373     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
11374     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
11375     if (Offset) {
11376       // BaseAddr = BaseAddr + Offset.
11377       EVT ArithType = BaseAddr.getValueType();
11378       SDLoc DL(Origin);
11379       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
11380                               DAG->getConstant(Offset, DL, ArithType));
11381     }
11382 
11383     // Create the type of the loaded slice according to its size.
11384     EVT SliceType = getLoadedType();
11385 
11386     // Create the load for the slice.
11387     SDValue LastInst =
11388         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
11389                      Origin->getPointerInfo().getWithOffset(Offset),
11390                      getAlignment(), Origin->getMemOperand()->getFlags());
11391     // If the final type is not the same as the loaded type, this means that
11392     // we have to pad with zero. Create a zero extend for that.
11393     EVT FinalType = Inst->getValueType(0);
11394     if (SliceType != FinalType)
11395       LastInst =
11396           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
11397     return LastInst;
11398   }
11399 
11400   /// \brief Check if this slice can be merged with an expensive cross register
11401   /// bank copy. E.g.,
11402   /// i = load i32
11403   /// f = bitcast i32 i to float
11404   bool canMergeExpensiveCrossRegisterBankCopy() const {
11405     if (!Inst || !Inst->hasOneUse())
11406       return false;
11407     SDNode *Use = *Inst->use_begin();
11408     if (Use->getOpcode() != ISD::BITCAST)
11409       return false;
11410     assert(DAG && "Missing context");
11411     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
11412     EVT ResVT = Use->getValueType(0);
11413     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
11414     const TargetRegisterClass *ArgRC =
11415         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
11416     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
11417       return false;
11418 
11419     // At this point, we know that we perform a cross-register-bank copy.
11420     // Check if it is expensive.
11421     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
11422     // Assume bitcasts are cheap, unless both register classes do not
11423     // explicitly share a common sub class.
11424     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
11425       return false;
11426 
11427     // Check if it will be merged with the load.
11428     // 1. Check the alignment constraint.
11429     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
11430         ResVT.getTypeForEVT(*DAG->getContext()));
11431 
11432     if (RequiredAlignment > getAlignment())
11433       return false;
11434 
11435     // 2. Check that the load is a legal operation for that type.
11436     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
11437       return false;
11438 
11439     // 3. Check that we do not have a zext in the way.
11440     if (Inst->getValueType(0) != getLoadedType())
11441       return false;
11442 
11443     return true;
11444   }
11445 };
11446 }
11447 
11448 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
11449 /// \p UsedBits looks like 0..0 1..1 0..0.
11450 static bool areUsedBitsDense(const APInt &UsedBits) {
11451   // If all the bits are one, this is dense!
11452   if (UsedBits.isAllOnesValue())
11453     return true;
11454 
11455   // Get rid of the unused bits on the right.
11456   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
11457   // Get rid of the unused bits on the left.
11458   if (NarrowedUsedBits.countLeadingZeros())
11459     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
11460   // Check that the chunk of bits is completely used.
11461   return NarrowedUsedBits.isAllOnesValue();
11462 }
11463 
11464 /// \brief Check whether or not \p First and \p Second are next to each other
11465 /// in memory. This means that there is no hole between the bits loaded
11466 /// by \p First and the bits loaded by \p Second.
11467 static bool areSlicesNextToEachOther(const LoadedSlice &First,
11468                                      const LoadedSlice &Second) {
11469   assert(First.Origin == Second.Origin && First.Origin &&
11470          "Unable to match different memory origins.");
11471   APInt UsedBits = First.getUsedBits();
11472   assert((UsedBits & Second.getUsedBits()) == 0 &&
11473          "Slices are not supposed to overlap.");
11474   UsedBits |= Second.getUsedBits();
11475   return areUsedBitsDense(UsedBits);
11476 }
11477 
11478 /// \brief Adjust the \p GlobalLSCost according to the target
11479 /// paring capabilities and the layout of the slices.
11480 /// \pre \p GlobalLSCost should account for at least as many loads as
11481 /// there is in the slices in \p LoadedSlices.
11482 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11483                                  LoadedSlice::Cost &GlobalLSCost) {
11484   unsigned NumberOfSlices = LoadedSlices.size();
11485   // If there is less than 2 elements, no pairing is possible.
11486   if (NumberOfSlices < 2)
11487     return;
11488 
11489   // Sort the slices so that elements that are likely to be next to each
11490   // other in memory are next to each other in the list.
11491   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
11492             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
11493     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
11494     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
11495   });
11496   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
11497   // First (resp. Second) is the first (resp. Second) potentially candidate
11498   // to be placed in a paired load.
11499   const LoadedSlice *First = nullptr;
11500   const LoadedSlice *Second = nullptr;
11501   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
11502                 // Set the beginning of the pair.
11503                                                            First = Second) {
11504 
11505     Second = &LoadedSlices[CurrSlice];
11506 
11507     // If First is NULL, it means we start a new pair.
11508     // Get to the next slice.
11509     if (!First)
11510       continue;
11511 
11512     EVT LoadedType = First->getLoadedType();
11513 
11514     // If the types of the slices are different, we cannot pair them.
11515     if (LoadedType != Second->getLoadedType())
11516       continue;
11517 
11518     // Check if the target supplies paired loads for this type.
11519     unsigned RequiredAlignment = 0;
11520     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
11521       // move to the next pair, this type is hopeless.
11522       Second = nullptr;
11523       continue;
11524     }
11525     // Check if we meet the alignment requirement.
11526     if (RequiredAlignment > First->getAlignment())
11527       continue;
11528 
11529     // Check that both loads are next to each other in memory.
11530     if (!areSlicesNextToEachOther(*First, *Second))
11531       continue;
11532 
11533     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
11534     --GlobalLSCost.Loads;
11535     // Move to the next pair.
11536     Second = nullptr;
11537   }
11538 }
11539 
11540 /// \brief Check the profitability of all involved LoadedSlice.
11541 /// Currently, it is considered profitable if there is exactly two
11542 /// involved slices (1) which are (2) next to each other in memory, and
11543 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
11544 ///
11545 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
11546 /// the elements themselves.
11547 ///
11548 /// FIXME: When the cost model will be mature enough, we can relax
11549 /// constraints (1) and (2).
11550 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
11551                                 const APInt &UsedBits, bool ForCodeSize) {
11552   unsigned NumberOfSlices = LoadedSlices.size();
11553   if (StressLoadSlicing)
11554     return NumberOfSlices > 1;
11555 
11556   // Check (1).
11557   if (NumberOfSlices != 2)
11558     return false;
11559 
11560   // Check (2).
11561   if (!areUsedBitsDense(UsedBits))
11562     return false;
11563 
11564   // Check (3).
11565   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
11566   // The original code has one big load.
11567   OrigCost.Loads = 1;
11568   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
11569     const LoadedSlice &LS = LoadedSlices[CurrSlice];
11570     // Accumulate the cost of all the slices.
11571     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
11572     GlobalSlicingCost += SliceCost;
11573 
11574     // Account as cost in the original configuration the gain obtained
11575     // with the current slices.
11576     OrigCost.addSliceGain(LS);
11577   }
11578 
11579   // If the target supports paired load, adjust the cost accordingly.
11580   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
11581   return OrigCost > GlobalSlicingCost;
11582 }
11583 
11584 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
11585 /// operations, split it in the various pieces being extracted.
11586 ///
11587 /// This sort of thing is introduced by SROA.
11588 /// This slicing takes care not to insert overlapping loads.
11589 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
11590 bool DAGCombiner::SliceUpLoad(SDNode *N) {
11591   if (Level < AfterLegalizeDAG)
11592     return false;
11593 
11594   LoadSDNode *LD = cast<LoadSDNode>(N);
11595   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
11596       !LD->getValueType(0).isInteger())
11597     return false;
11598 
11599   // Keep track of already used bits to detect overlapping values.
11600   // In that case, we will just abort the transformation.
11601   APInt UsedBits(LD->getValueSizeInBits(0), 0);
11602 
11603   SmallVector<LoadedSlice, 4> LoadedSlices;
11604 
11605   // Check if this load is used as several smaller chunks of bits.
11606   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
11607   // of computation for each trunc.
11608   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
11609        UI != UIEnd; ++UI) {
11610     // Skip the uses of the chain.
11611     if (UI.getUse().getResNo() != 0)
11612       continue;
11613 
11614     SDNode *User = *UI;
11615     unsigned Shift = 0;
11616 
11617     // Check if this is a trunc(lshr).
11618     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
11619         isa<ConstantSDNode>(User->getOperand(1))) {
11620       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
11621       User = *User->use_begin();
11622     }
11623 
11624     // At this point, User is a Truncate, iff we encountered, trunc or
11625     // trunc(lshr).
11626     if (User->getOpcode() != ISD::TRUNCATE)
11627       return false;
11628 
11629     // The width of the type must be a power of 2 and greater than 8-bits.
11630     // Otherwise the load cannot be represented in LLVM IR.
11631     // Moreover, if we shifted with a non-8-bits multiple, the slice
11632     // will be across several bytes. We do not support that.
11633     unsigned Width = User->getValueSizeInBits(0);
11634     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
11635       return 0;
11636 
11637     // Build the slice for this chain of computations.
11638     LoadedSlice LS(User, LD, Shift, &DAG);
11639     APInt CurrentUsedBits = LS.getUsedBits();
11640 
11641     // Check if this slice overlaps with another.
11642     if ((CurrentUsedBits & UsedBits) != 0)
11643       return false;
11644     // Update the bits used globally.
11645     UsedBits |= CurrentUsedBits;
11646 
11647     // Check if the new slice would be legal.
11648     if (!LS.isLegal())
11649       return false;
11650 
11651     // Record the slice.
11652     LoadedSlices.push_back(LS);
11653   }
11654 
11655   // Abort slicing if it does not seem to be profitable.
11656   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
11657     return false;
11658 
11659   ++SlicedLoads;
11660 
11661   // Rewrite each chain to use an independent load.
11662   // By construction, each chain can be represented by a unique load.
11663 
11664   // Prepare the argument for the new token factor for all the slices.
11665   SmallVector<SDValue, 8> ArgChains;
11666   for (SmallVectorImpl<LoadedSlice>::const_iterator
11667            LSIt = LoadedSlices.begin(),
11668            LSItEnd = LoadedSlices.end();
11669        LSIt != LSItEnd; ++LSIt) {
11670     SDValue SliceInst = LSIt->loadSlice();
11671     CombineTo(LSIt->Inst, SliceInst, true);
11672     if (SliceInst.getOpcode() != ISD::LOAD)
11673       SliceInst = SliceInst.getOperand(0);
11674     assert(SliceInst->getOpcode() == ISD::LOAD &&
11675            "It takes more than a zext to get to the loaded slice!!");
11676     ArgChains.push_back(SliceInst.getValue(1));
11677   }
11678 
11679   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11680                               ArgChains);
11681   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11682   AddToWorklist(Chain.getNode());
11683   return true;
11684 }
11685 
11686 /// Check to see if V is (and load (ptr), imm), where the load is having
11687 /// specific bytes cleared out.  If so, return the byte size being masked out
11688 /// and the shift amount.
11689 static std::pair<unsigned, unsigned>
11690 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
11691   std::pair<unsigned, unsigned> Result(0, 0);
11692 
11693   // Check for the structure we're looking for.
11694   if (V->getOpcode() != ISD::AND ||
11695       !isa<ConstantSDNode>(V->getOperand(1)) ||
11696       !ISD::isNormalLoad(V->getOperand(0).getNode()))
11697     return Result;
11698 
11699   // Check the chain and pointer.
11700   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11701   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
11702 
11703   // The store should be chained directly to the load or be an operand of a
11704   // tokenfactor.
11705   if (LD == Chain.getNode())
11706     ; // ok.
11707   else if (Chain->getOpcode() != ISD::TokenFactor)
11708     return Result; // Fail.
11709   else {
11710     bool isOk = false;
11711     for (const SDValue &ChainOp : Chain->op_values())
11712       if (ChainOp.getNode() == LD) {
11713         isOk = true;
11714         break;
11715       }
11716     if (!isOk) return Result;
11717   }
11718 
11719   // This only handles simple types.
11720   if (V.getValueType() != MVT::i16 &&
11721       V.getValueType() != MVT::i32 &&
11722       V.getValueType() != MVT::i64)
11723     return Result;
11724 
11725   // Check the constant mask.  Invert it so that the bits being masked out are
11726   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
11727   // follow the sign bit for uniformity.
11728   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
11729   unsigned NotMaskLZ = countLeadingZeros(NotMask);
11730   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
11731   unsigned NotMaskTZ = countTrailingZeros(NotMask);
11732   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
11733   if (NotMaskLZ == 64) return Result;  // All zero mask.
11734 
11735   // See if we have a continuous run of bits.  If so, we have 0*1+0*
11736   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
11737     return Result;
11738 
11739   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
11740   if (V.getValueType() != MVT::i64 && NotMaskLZ)
11741     NotMaskLZ -= 64-V.getValueSizeInBits();
11742 
11743   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
11744   switch (MaskedBytes) {
11745   case 1:
11746   case 2:
11747   case 4: break;
11748   default: return Result; // All one mask, or 5-byte mask.
11749   }
11750 
11751   // Verify that the first bit starts at a multiple of mask so that the access
11752   // is aligned the same as the access width.
11753   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
11754 
11755   Result.first = MaskedBytes;
11756   Result.second = NotMaskTZ/8;
11757   return Result;
11758 }
11759 
11760 
11761 /// Check to see if IVal is something that provides a value as specified by
11762 /// MaskInfo. If so, replace the specified store with a narrower store of
11763 /// truncated IVal.
11764 static SDNode *
11765 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
11766                                 SDValue IVal, StoreSDNode *St,
11767                                 DAGCombiner *DC) {
11768   unsigned NumBytes = MaskInfo.first;
11769   unsigned ByteShift = MaskInfo.second;
11770   SelectionDAG &DAG = DC->getDAG();
11771 
11772   // Check to see if IVal is all zeros in the part being masked in by the 'or'
11773   // that uses this.  If not, this is not a replacement.
11774   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
11775                                   ByteShift*8, (ByteShift+NumBytes)*8);
11776   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
11777 
11778   // Check that it is legal on the target to do this.  It is legal if the new
11779   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
11780   // legalization.
11781   MVT VT = MVT::getIntegerVT(NumBytes*8);
11782   if (!DC->isTypeLegal(VT))
11783     return nullptr;
11784 
11785   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
11786   // shifted by ByteShift and truncated down to NumBytes.
11787   if (ByteShift) {
11788     SDLoc DL(IVal);
11789     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
11790                        DAG.getConstant(ByteShift*8, DL,
11791                                     DC->getShiftAmountTy(IVal.getValueType())));
11792   }
11793 
11794   // Figure out the offset for the store and the alignment of the access.
11795   unsigned StOffset;
11796   unsigned NewAlign = St->getAlignment();
11797 
11798   if (DAG.getDataLayout().isLittleEndian())
11799     StOffset = ByteShift;
11800   else
11801     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
11802 
11803   SDValue Ptr = St->getBasePtr();
11804   if (StOffset) {
11805     SDLoc DL(IVal);
11806     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
11807                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
11808     NewAlign = MinAlign(NewAlign, StOffset);
11809   }
11810 
11811   // Truncate down to the new size.
11812   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
11813 
11814   ++OpsNarrowed;
11815   return DAG
11816       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
11817                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
11818       .getNode();
11819 }
11820 
11821 
11822 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
11823 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
11824 /// narrowing the load and store if it would end up being a win for performance
11825 /// or code size.
11826 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
11827   StoreSDNode *ST  = cast<StoreSDNode>(N);
11828   if (ST->isVolatile())
11829     return SDValue();
11830 
11831   SDValue Chain = ST->getChain();
11832   SDValue Value = ST->getValue();
11833   SDValue Ptr   = ST->getBasePtr();
11834   EVT VT = Value.getValueType();
11835 
11836   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
11837     return SDValue();
11838 
11839   unsigned Opc = Value.getOpcode();
11840 
11841   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
11842   // is a byte mask indicating a consecutive number of bytes, check to see if
11843   // Y is known to provide just those bytes.  If so, we try to replace the
11844   // load + replace + store sequence with a single (narrower) store, which makes
11845   // the load dead.
11846   if (Opc == ISD::OR) {
11847     std::pair<unsigned, unsigned> MaskedLoad;
11848     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
11849     if (MaskedLoad.first)
11850       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11851                                                   Value.getOperand(1), ST,this))
11852         return SDValue(NewST, 0);
11853 
11854     // Or is commutative, so try swapping X and Y.
11855     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
11856     if (MaskedLoad.first)
11857       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11858                                                   Value.getOperand(0), ST,this))
11859         return SDValue(NewST, 0);
11860   }
11861 
11862   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
11863       Value.getOperand(1).getOpcode() != ISD::Constant)
11864     return SDValue();
11865 
11866   SDValue N0 = Value.getOperand(0);
11867   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11868       Chain == SDValue(N0.getNode(), 1)) {
11869     LoadSDNode *LD = cast<LoadSDNode>(N0);
11870     if (LD->getBasePtr() != Ptr ||
11871         LD->getPointerInfo().getAddrSpace() !=
11872         ST->getPointerInfo().getAddrSpace())
11873       return SDValue();
11874 
11875     // Find the type to narrow it the load / op / store to.
11876     SDValue N1 = Value.getOperand(1);
11877     unsigned BitWidth = N1.getValueSizeInBits();
11878     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
11879     if (Opc == ISD::AND)
11880       Imm ^= APInt::getAllOnesValue(BitWidth);
11881     if (Imm == 0 || Imm.isAllOnesValue())
11882       return SDValue();
11883     unsigned ShAmt = Imm.countTrailingZeros();
11884     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
11885     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
11886     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11887     // The narrowing should be profitable, the load/store operation should be
11888     // legal (or custom) and the store size should be equal to the NewVT width.
11889     while (NewBW < BitWidth &&
11890            (NewVT.getStoreSizeInBits() != NewBW ||
11891             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
11892             !TLI.isNarrowingProfitable(VT, NewVT))) {
11893       NewBW = NextPowerOf2(NewBW);
11894       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11895     }
11896     if (NewBW >= BitWidth)
11897       return SDValue();
11898 
11899     // If the lsb changed does not start at the type bitwidth boundary,
11900     // start at the previous one.
11901     if (ShAmt % NewBW)
11902       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
11903     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
11904                                    std::min(BitWidth, ShAmt + NewBW));
11905     if ((Imm & Mask) == Imm) {
11906       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
11907       if (Opc == ISD::AND)
11908         NewImm ^= APInt::getAllOnesValue(NewBW);
11909       uint64_t PtrOff = ShAmt / 8;
11910       // For big endian targets, we need to adjust the offset to the pointer to
11911       // load the correct bytes.
11912       if (DAG.getDataLayout().isBigEndian())
11913         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11914 
11915       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11916       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11917       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11918         return SDValue();
11919 
11920       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11921                                    Ptr.getValueType(), Ptr,
11922                                    DAG.getConstant(PtrOff, SDLoc(LD),
11923                                                    Ptr.getValueType()));
11924       SDValue NewLD =
11925           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11926                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11927                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
11928       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11929                                    DAG.getConstant(NewImm, SDLoc(Value),
11930                                                    NewVT));
11931       SDValue NewST =
11932           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11933                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11934 
11935       AddToWorklist(NewPtr.getNode());
11936       AddToWorklist(NewLD.getNode());
11937       AddToWorklist(NewVal.getNode());
11938       WorklistRemover DeadNodes(*this);
11939       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11940       ++OpsNarrowed;
11941       return NewST;
11942     }
11943   }
11944 
11945   return SDValue();
11946 }
11947 
11948 /// For a given floating point load / store pair, if the load value isn't used
11949 /// by any other operations, then consider transforming the pair to integer
11950 /// load / store operations if the target deems the transformation profitable.
11951 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11952   StoreSDNode *ST  = cast<StoreSDNode>(N);
11953   SDValue Chain = ST->getChain();
11954   SDValue Value = ST->getValue();
11955   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11956       Value.hasOneUse() &&
11957       Chain == SDValue(Value.getNode(), 1)) {
11958     LoadSDNode *LD = cast<LoadSDNode>(Value);
11959     EVT VT = LD->getMemoryVT();
11960     if (!VT.isFloatingPoint() ||
11961         VT != ST->getMemoryVT() ||
11962         LD->isNonTemporal() ||
11963         ST->isNonTemporal() ||
11964         LD->getPointerInfo().getAddrSpace() != 0 ||
11965         ST->getPointerInfo().getAddrSpace() != 0)
11966       return SDValue();
11967 
11968     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11969     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11970         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11971         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
11972         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
11973       return SDValue();
11974 
11975     unsigned LDAlign = LD->getAlignment();
11976     unsigned STAlign = ST->getAlignment();
11977     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11978     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11979     if (LDAlign < ABIAlign || STAlign < ABIAlign)
11980       return SDValue();
11981 
11982     SDValue NewLD =
11983         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11984                     LD->getPointerInfo(), LDAlign);
11985 
11986     SDValue NewST =
11987         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11988                      ST->getPointerInfo(), STAlign);
11989 
11990     AddToWorklist(NewLD.getNode());
11991     AddToWorklist(NewST.getNode());
11992     WorklistRemover DeadNodes(*this);
11993     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11994     ++LdStFP2Int;
11995     return NewST;
11996   }
11997 
11998   return SDValue();
11999 }
12000 
12001 // This is a helper function for visitMUL to check the profitability
12002 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12003 // MulNode is the original multiply, AddNode is (add x, c1),
12004 // and ConstNode is c2.
12005 //
12006 // If the (add x, c1) has multiple uses, we could increase
12007 // the number of adds if we make this transformation.
12008 // It would only be worth doing this if we can remove a
12009 // multiply in the process. Check for that here.
12010 // To illustrate:
12011 //     (A + c1) * c3
12012 //     (A + c2) * c3
12013 // We're checking for cases where we have common "c3 * A" expressions.
12014 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12015                                               SDValue &AddNode,
12016                                               SDValue &ConstNode) {
12017   APInt Val;
12018 
12019   // If the add only has one use, this would be OK to do.
12020   if (AddNode.getNode()->hasOneUse())
12021     return true;
12022 
12023   // Walk all the users of the constant with which we're multiplying.
12024   for (SDNode *Use : ConstNode->uses()) {
12025 
12026     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12027       continue;
12028 
12029     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12030       SDNode *OtherOp;
12031       SDNode *MulVar = AddNode.getOperand(0).getNode();
12032 
12033       // OtherOp is what we're multiplying against the constant.
12034       if (Use->getOperand(0) == ConstNode)
12035         OtherOp = Use->getOperand(1).getNode();
12036       else
12037         OtherOp = Use->getOperand(0).getNode();
12038 
12039       // Check to see if multiply is with the same operand of our "add".
12040       //
12041       //     ConstNode  = CONST
12042       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12043       //     ...
12044       //     AddNode  = (A + c1)  <-- MulVar is A.
12045       //         = AddNode * ConstNode   <-- current visiting instruction.
12046       //
12047       // If we make this transformation, we will have a common
12048       // multiply (ConstNode * A) that we can save.
12049       if (OtherOp == MulVar)
12050         return true;
12051 
12052       // Now check to see if a future expansion will give us a common
12053       // multiply.
12054       //
12055       //     ConstNode  = CONST
12056       //     AddNode    = (A + c1)
12057       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12058       //     ...
12059       //     OtherOp = (A + c2)
12060       //     Use     = OtherOp * ConstNode <-- visiting Use.
12061       //
12062       // If we make this transformation, we will have a common
12063       // multiply (CONST * A) after we also do the same transformation
12064       // to the "t2" instruction.
12065       if (OtherOp->getOpcode() == ISD::ADD &&
12066           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12067           OtherOp->getOperand(0).getNode() == MulVar)
12068         return true;
12069     }
12070   }
12071 
12072   // Didn't find a case where this would be profitable.
12073   return false;
12074 }
12075 
12076 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12077                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
12078                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
12079   // Make sure we have something to merge.
12080   if (NumStores < 2)
12081     return false;
12082 
12083   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12084 
12085   // The latest Node in the DAG.
12086   SDLoc DL(StoreNodes[0].MemNode);
12087 
12088   SDValue StoredVal;
12089   if (UseVector) {
12090     bool IsVec = MemVT.isVector();
12091     unsigned Elts = NumStores;
12092     if (IsVec) {
12093       // When merging vector stores, get the total number of elements.
12094       Elts *= MemVT.getVectorNumElements();
12095     }
12096     // Get the type for the merged vector store.
12097     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12098     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
12099 
12100     if (IsConstantSrc) {
12101       SmallVector<SDValue, 8> BuildVector;
12102       for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
12103         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12104         SDValue Val = St->getValue();
12105         if (MemVT.getScalarType().isInteger())
12106           if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
12107             Val = DAG.getConstant(
12108                 (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
12109                 SDLoc(CFP), MemVT);
12110         BuildVector.push_back(Val);
12111       }
12112       StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
12113     } else {
12114       SmallVector<SDValue, 8> Ops;
12115       for (unsigned i = 0; i < NumStores; ++i) {
12116         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12117         SDValue Val = St->getValue();
12118         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
12119         if (Val.getValueType() != MemVT)
12120           return false;
12121         Ops.push_back(Val);
12122       }
12123 
12124       // Build the extracted vector elements back into a vector.
12125       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
12126                               DL, Ty, Ops);    }
12127   } else {
12128     // We should always use a vector store when merging extracted vector
12129     // elements, so this path implies a store of constants.
12130     assert(IsConstantSrc && "Merged vector elements should use vector store");
12131 
12132     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
12133     APInt StoreInt(SizeInBits, 0);
12134 
12135     // Construct a single integer constant which is made of the smaller
12136     // constant inputs.
12137     bool IsLE = DAG.getDataLayout().isLittleEndian();
12138     for (unsigned i = 0; i < NumStores; ++i) {
12139       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12140       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12141 
12142       SDValue Val = St->getValue();
12143       StoreInt <<= ElementSizeBytes * 8;
12144       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12145         StoreInt |= C->getAPIntValue().zext(SizeInBits);
12146       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12147         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
12148       } else {
12149         llvm_unreachable("Invalid constant element type");
12150       }
12151     }
12152 
12153     // Create the new Load and Store operations.
12154     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12155     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12156   }
12157 
12158   SmallVector<SDValue, 8> Chains;
12159 
12160   // Gather all Chains we're inheriting. As generally all chains are
12161   // equal, do minor check to remove obvious redundancies.
12162   Chains.push_back(StoreNodes[0].MemNode->getChain());
12163   for (unsigned i = 1; i < NumStores; ++i)
12164     if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
12165       Chains.push_back(StoreNodes[i].MemNode->getChain());
12166 
12167   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12168   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12169   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
12170                                   FirstInChain->getBasePtr(),
12171                                   FirstInChain->getPointerInfo(),
12172                                   FirstInChain->getAlignment());
12173 
12174   // Replace all merged stores with the new store.
12175   for (unsigned i = 0; i < NumStores; ++i)
12176     CombineTo(StoreNodes[i].MemNode, NewStore);
12177 
12178   AddToWorklist(NewChain.getNode());
12179   return true;
12180 }
12181 
12182 void DAGCombiner::getStoreMergeCandidates(
12183     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12184   // This holds the base pointer, index, and the offset in bytes from the base
12185   // pointer.
12186   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
12187   EVT MemVT = St->getMemoryVT();
12188 
12189   // We must have a base and an offset.
12190   if (!BasePtr.Base.getNode())
12191     return;
12192 
12193   // Do not handle stores to undef base pointers.
12194   if (BasePtr.Base.isUndef())
12195     return;
12196 
12197   // We looking for a root node which is an ancestor to all mergable
12198   // stores. We search up through a load, to our root and then down
12199   // through all children. For instance we will find Store{1,2,3} if
12200   // St is Store1, Store2. or Store3 where the root is not a load
12201   // which always true for nonvolatile ops. TODO: Expand
12202   // the search to find all valid candidates through multiple layers of loads.
12203   //
12204   // Root
12205   // |-------|-------|
12206   // Load    Load    Store3
12207   // |       |
12208   // Store1   Store2
12209   //
12210   // FIXME: We should be able to climb and
12211   // descend TokenFactors to find candidates as well.
12212 
12213   SDNode *RootNode = (St->getChain()).getNode();
12214 
12215   // Set of Parents of Candidates
12216   std::set<SDNode *> CandidateParents;
12217 
12218   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
12219     RootNode = Ldn->getChain().getNode();
12220     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
12221       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
12222         CandidateParents.insert(*I);
12223   } else
12224     CandidateParents.insert(RootNode);
12225 
12226   bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
12227   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
12228                        isa<ConstantFPSDNode>(St->getValue());
12229   bool IsExtractVecSrc =
12230       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12231        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12232   auto CorrectValueKind = [&](StoreSDNode *Other) -> bool {
12233     if (IsLoadSrc)
12234       return isa<LoadSDNode>(Other->getValue());
12235     if (IsConstantSrc)
12236       return (isa<ConstantSDNode>(Other->getValue()) ||
12237               isa<ConstantFPSDNode>(Other->getValue()));
12238     if (IsExtractVecSrc)
12239       return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12240               Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
12241     return false;
12242   };
12243 
12244   // check all parents of mergable children
12245   for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P)
12246     for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I)
12247       if (I.getOperandNo() == 0)
12248         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
12249           if (OtherST->isVolatile() || OtherST->isIndexed())
12250             continue;
12251           // We can merge constant floats to equivalent integers
12252           if (OtherST->getMemoryVT() != MemVT)
12253             if (!(MemVT.isInteger() && MemVT.bitsEq(OtherST->getMemoryVT()) &&
12254                   isa<ConstantFPSDNode>(OtherST->getValue())))
12255               continue;
12256           BaseIndexOffset Ptr =
12257               BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
12258           if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST))
12259             StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
12260         }
12261 }
12262 
12263 // We need to check that merging these stores does not cause a loop
12264 // in the DAG. Any store candidate may depend on another candidate
12265 // indirectly through its operand (we already consider dependencies
12266 // through the chain). Check in parallel by searching up from
12267 // non-chain operands of candidates.
12268 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
12269     SmallVectorImpl<MemOpLink> &StoreNodes) {
12270   SmallPtrSet<const SDNode *, 16> Visited;
12271   SmallVector<const SDNode *, 8> Worklist;
12272   // search ops of store candidates
12273   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
12274     SDNode *n = StoreNodes[i].MemNode;
12275     // Potential loops may happen only through non-chain operands
12276     for (unsigned j = 1; j < n->getNumOperands(); ++j)
12277       Worklist.push_back(n->getOperand(j).getNode());
12278   }
12279   // search through DAG. We can stop early if we find a storenode
12280   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
12281     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
12282       return false;
12283   }
12284   return true;
12285 }
12286 
12287 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
12288   if (OptLevel == CodeGenOpt::None)
12289     return false;
12290 
12291   EVT MemVT = St->getMemoryVT();
12292   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
12293 
12294   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
12295     return false;
12296 
12297   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
12298       Attribute::NoImplicitFloat);
12299 
12300   // This function cannot currently deal with non-byte-sized memory sizes.
12301   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
12302     return false;
12303 
12304   if (!MemVT.isSimple())
12305     return false;
12306 
12307   // Perform an early exit check. Do not bother looking at stored values that
12308   // are not constants, loads, or extracted vector elements.
12309   SDValue StoredVal = St->getValue();
12310   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
12311   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
12312                        isa<ConstantFPSDNode>(StoredVal);
12313   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12314                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12315 
12316   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
12317     return false;
12318 
12319   // Don't merge vectors into wider vectors if the source data comes from loads.
12320   // TODO: This restriction can be lifted by using logic similar to the
12321   // ExtractVecSrc case.
12322   if (MemVT.isVector() && IsLoadSrc)
12323     return false;
12324 
12325   SmallVector<MemOpLink, 8> StoreNodes;
12326   // Find potential store merge candidates by searching through chain sub-DAG
12327   getStoreMergeCandidates(St, StoreNodes);
12328 
12329   // Check if there is anything to merge.
12330   if (StoreNodes.size() < 2)
12331     return false;
12332 
12333   // Check that we can merge these candidates without causing a cycle
12334   if (!checkMergeStoreCandidatesForDependencies(StoreNodes))
12335     return false;
12336 
12337   // Sort the memory operands according to their distance from the
12338   // base pointer.
12339   std::sort(StoreNodes.begin(), StoreNodes.end(),
12340             [](MemOpLink LHS, MemOpLink RHS) {
12341               return LHS.OffsetFromBase < RHS.OffsetFromBase;
12342             });
12343 
12344   // Scan the memory operations on the chain and find the first non-consecutive
12345   // store memory address.
12346   unsigned NumConsecutiveStores = 0;
12347   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
12348 
12349   // Check that the addresses are consecutive starting from the second
12350   // element in the list of stores.
12351   for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
12352     int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
12353     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12354       break;
12355     NumConsecutiveStores = i + 1;
12356   }
12357 
12358   if (NumConsecutiveStores < 2)
12359     return false;
12360 
12361   // The node with the lowest store address.
12362   LLVMContext &Context = *DAG.getContext();
12363   const DataLayout &DL = DAG.getDataLayout();
12364 
12365   // Store the constants into memory as one consecutive store.
12366   if (IsConstantSrc) {
12367     bool RV = false;
12368     while (NumConsecutiveStores > 1) {
12369       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12370       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12371       unsigned FirstStoreAlign = FirstInChain->getAlignment();
12372       unsigned LastLegalType = 0;
12373       unsigned LastLegalVectorType = 0;
12374       bool NonZero = false;
12375       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12376         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
12377         SDValue StoredVal = ST->getValue();
12378 
12379         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
12380           NonZero |= !C->isNullValue();
12381         } else if (ConstantFPSDNode *C =
12382                        dyn_cast<ConstantFPSDNode>(StoredVal)) {
12383           NonZero |= !C->getConstantFPValue()->isNullValue();
12384         } else {
12385           // Non-constant.
12386           break;
12387         }
12388 
12389         // Find a legal type for the constant store.
12390         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
12391         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12392         bool IsFast = false;
12393         if (TLI.isTypeLegal(StoreTy) &&
12394             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12395                                    FirstStoreAlign, &IsFast) &&
12396             IsFast) {
12397           LastLegalType = i + 1;
12398           // Or check whether a truncstore is legal.
12399         } else if (TLI.getTypeAction(Context, StoreTy) ==
12400                    TargetLowering::TypePromoteInteger) {
12401           EVT LegalizedStoredValueTy =
12402               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
12403           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12404               TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12405                                      FirstStoreAS, FirstStoreAlign, &IsFast) &&
12406               IsFast) {
12407             LastLegalType = i + 1;
12408           }
12409         }
12410 
12411         // We only use vectors if the constant is known to be zero or the target
12412         // allows it and the function is not marked with the noimplicitfloat
12413         // attribute.
12414         if ((!NonZero ||
12415              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
12416             !NoVectors) {
12417           // Find a legal type for the vector store.
12418           EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
12419           if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
12420               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12421                                      FirstStoreAlign, &IsFast) &&
12422               IsFast)
12423             LastLegalVectorType = i + 1;
12424         }
12425       }
12426 
12427       // Check if we found a legal integer type that creates a meaningful merge.
12428       if (LastLegalType < 2 && LastLegalVectorType < 2)
12429         break;
12430 
12431       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
12432       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
12433 
12434       bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
12435                                                     true, UseVector);
12436       if (!Merged)
12437         break;
12438       // Remove merged stores for next iteration.
12439       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
12440       RV = true;
12441       NumConsecutiveStores -= NumElem;
12442     }
12443     return RV;
12444   }
12445 
12446   // When extracting multiple vector elements, try to store them
12447   // in one vector store rather than a sequence of scalar stores.
12448   if (IsExtractVecSrc) {
12449     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12450     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12451     unsigned FirstStoreAlign = FirstInChain->getAlignment();
12452     unsigned NumStoresToMerge = 0;
12453     bool IsVec = MemVT.isVector();
12454     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12455       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
12456       unsigned StoreValOpcode = St->getValue().getOpcode();
12457       // This restriction could be loosened.
12458       // Bail out if any stored values are not elements extracted from a vector.
12459       // It should be possible to handle mixed sources, but load sources need
12460       // more careful handling (see the block of code below that handles
12461       // consecutive loads).
12462       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
12463           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
12464         return false;
12465 
12466       // Find a legal type for the vector store.
12467       unsigned Elts = i + 1;
12468       if (IsVec) {
12469         // When merging vector stores, get the total number of elements.
12470         Elts *= MemVT.getVectorNumElements();
12471       }
12472       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12473       bool IsFast;
12474       if (TLI.isTypeLegal(Ty) &&
12475           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
12476                                  FirstStoreAlign, &IsFast) && IsFast)
12477         NumStoresToMerge = i + 1;
12478     }
12479 
12480     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
12481                                            false, true);
12482   }
12483 
12484   // Below we handle the case of multiple consecutive stores that
12485   // come from multiple consecutive loads. We merge them into a single
12486   // wide load and a single wide store.
12487 
12488   // Look for load nodes which are used by the stored values.
12489   SmallVector<MemOpLink, 8> LoadNodes;
12490 
12491   // Find acceptable loads. Loads need to have the same chain (token factor),
12492   // must not be zext, volatile, indexed, and they must be consecutive.
12493   BaseIndexOffset LdBasePtr;
12494   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
12495     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
12496     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
12497     if (!Ld) break;
12498 
12499     // Loads must only have one use.
12500     if (!Ld->hasNUsesOfValue(1, 0))
12501       break;
12502 
12503     // The memory operands must not be volatile.
12504     if (Ld->isVolatile() || Ld->isIndexed())
12505       break;
12506 
12507     // We do not accept ext loads.
12508     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
12509       break;
12510 
12511     // The stored memory type must be the same.
12512     if (Ld->getMemoryVT() != MemVT)
12513       break;
12514 
12515     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
12516     // If this is not the first ptr that we check.
12517     if (LdBasePtr.Base.getNode()) {
12518       // The base ptr must be the same.
12519       if (!LdPtr.equalBaseIndex(LdBasePtr))
12520         break;
12521     } else {
12522       // Check that all other base pointers are the same as this one.
12523       LdBasePtr = LdPtr;
12524     }
12525 
12526     // We found a potential memory operand to merge.
12527     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
12528   }
12529 
12530   if (LoadNodes.size() < 2)
12531     return false;
12532 
12533   // If we have load/store pair instructions and we only have two values,
12534   // don't bother.
12535   unsigned RequiredAlignment;
12536   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
12537       St->getAlignment() >= RequiredAlignment)
12538     return false;
12539   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12540   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
12541   unsigned FirstStoreAlign = FirstInChain->getAlignment();
12542   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
12543   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
12544   unsigned FirstLoadAlign = FirstLoad->getAlignment();
12545 
12546   // Scan the memory operations on the chain and find the first non-consecutive
12547   // load memory address. These variables hold the index in the store node
12548   // array.
12549   unsigned LastConsecutiveLoad = 0;
12550   // This variable refers to the size and not index in the array.
12551   unsigned LastLegalVectorType = 0;
12552   unsigned LastLegalIntegerType = 0;
12553   StartAddress = LoadNodes[0].OffsetFromBase;
12554   SDValue FirstChain = FirstLoad->getChain();
12555   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
12556     // All loads must share the same chain.
12557     if (LoadNodes[i].MemNode->getChain() != FirstChain)
12558       break;
12559 
12560     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
12561     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
12562       break;
12563     LastConsecutiveLoad = i;
12564     // Find a legal type for the vector store.
12565     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
12566     bool IsFastSt, IsFastLd;
12567     if (TLI.isTypeLegal(StoreTy) &&
12568         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12569                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
12570         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12571                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
12572       LastLegalVectorType = i + 1;
12573     }
12574 
12575     // Find a legal type for the integer store.
12576     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
12577     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
12578     if (TLI.isTypeLegal(StoreTy) &&
12579         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12580                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
12581         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
12582                                FirstLoadAlign, &IsFastLd) && IsFastLd)
12583       LastLegalIntegerType = i + 1;
12584     // Or check whether a truncstore and extload is legal.
12585     else if (TLI.getTypeAction(Context, StoreTy) ==
12586              TargetLowering::TypePromoteInteger) {
12587       EVT LegalizedStoredValueTy =
12588         TLI.getTypeToTransformTo(Context, StoreTy);
12589       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12590           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12591           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12592           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12593           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12594                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12595           IsFastSt &&
12596           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12597                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
12598           IsFastLd)
12599         LastLegalIntegerType = i+1;
12600     }
12601   }
12602 
12603   // Only use vector types if the vector type is larger than the integer type.
12604   // If they are the same, use integers.
12605   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12606   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
12607 
12608   // We add +1 here because the LastXXX variables refer to location while
12609   // the NumElem refers to array/index size.
12610   unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
12611   NumElem = std::min(LastLegalType, NumElem);
12612 
12613   if (NumElem < 2)
12614     return false;
12615 
12616   // Collect the chains from all merged stores. Because the common case
12617   // all chains are the same, check if we match the first Chain.
12618   SmallVector<SDValue, 8> MergeStoreChains;
12619   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
12620   for (unsigned i = 1; i < NumElem; ++i)
12621     if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain())
12622       MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
12623 
12624   // Find if it is better to use vectors or integers to load and store
12625   // to memory.
12626   EVT JointMemOpVT;
12627   if (UseVectorTy) {
12628     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12629   } else {
12630     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12631     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12632   }
12633 
12634   SDLoc LoadDL(LoadNodes[0].MemNode);
12635   SDLoc StoreDL(StoreNodes[0].MemNode);
12636 
12637   // The merged loads are required to have the same incoming chain, so
12638   // using the first's chain is acceptable.
12639   SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
12640                                 FirstLoad->getBasePtr(),
12641                                 FirstLoad->getPointerInfo(), FirstLoadAlign);
12642 
12643   SDValue NewStoreChain =
12644     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
12645 
12646   AddToWorklist(NewStoreChain.getNode());
12647 
12648   SDValue NewStore =
12649       DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
12650                    FirstInChain->getPointerInfo(), FirstStoreAlign);
12651 
12652   // Transfer chain users from old loads to the new load.
12653   for (unsigned i = 0; i < NumElem; ++i) {
12654     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12655     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
12656                                   SDValue(NewLoad.getNode(), 1));
12657   }
12658 
12659   // Replace the all stores with the new store.
12660   for (unsigned i = 0; i < NumElem; ++i)
12661     CombineTo(StoreNodes[i].MemNode, NewStore);
12662   return true;
12663 }
12664 
12665 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12666   SDLoc SL(ST);
12667   SDValue ReplStore;
12668 
12669   // Replace the chain to avoid dependency.
12670   if (ST->isTruncatingStore()) {
12671     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12672                                   ST->getBasePtr(), ST->getMemoryVT(),
12673                                   ST->getMemOperand());
12674   } else {
12675     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12676                              ST->getMemOperand());
12677   }
12678 
12679   // Create token to keep both nodes around.
12680   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12681                               MVT::Other, ST->getChain(), ReplStore);
12682 
12683   // Make sure the new and old chains are cleaned up.
12684   AddToWorklist(Token.getNode());
12685 
12686   // Don't add users to work list.
12687   return CombineTo(ST, Token, false);
12688 }
12689 
12690 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12691   SDValue Value = ST->getValue();
12692   if (Value.getOpcode() == ISD::TargetConstantFP)
12693     return SDValue();
12694 
12695   SDLoc DL(ST);
12696 
12697   SDValue Chain = ST->getChain();
12698   SDValue Ptr = ST->getBasePtr();
12699 
12700   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
12701 
12702   // NOTE: If the original store is volatile, this transform must not increase
12703   // the number of stores.  For example, on x86-32 an f64 can be stored in one
12704   // processor operation but an i64 (which is not legal) requires two.  So the
12705   // transform should not be done in this case.
12706 
12707   SDValue Tmp;
12708   switch (CFP->getSimpleValueType(0).SimpleTy) {
12709   default:
12710     llvm_unreachable("Unknown FP type");
12711   case MVT::f16:    // We don't do this for these yet.
12712   case MVT::f80:
12713   case MVT::f128:
12714   case MVT::ppcf128:
12715     return SDValue();
12716   case MVT::f32:
12717     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12718         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12719       ;
12720       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12721                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12722                             MVT::i32);
12723       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12724     }
12725 
12726     return SDValue();
12727   case MVT::f64:
12728     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12729          !ST->isVolatile()) ||
12730         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
12731       ;
12732       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12733                             getZExtValue(), SDLoc(CFP), MVT::i64);
12734       return DAG.getStore(Chain, DL, Tmp,
12735                           Ptr, ST->getMemOperand());
12736     }
12737 
12738     if (!ST->isVolatile() &&
12739         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
12740       // Many FP stores are not made apparent until after legalize, e.g. for
12741       // argument passing.  Since this is so common, custom legalize the
12742       // 64-bit integer store into two 32-bit stores.
12743       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12744       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12745       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12746       if (DAG.getDataLayout().isBigEndian())
12747         std::swap(Lo, Hi);
12748 
12749       unsigned Alignment = ST->getAlignment();
12750       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12751       AAMDNodes AAInfo = ST->getAAInfo();
12752 
12753       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12754                                  ST->getAlignment(), MMOFlags, AAInfo);
12755       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12756                         DAG.getConstant(4, DL, Ptr.getValueType()));
12757       Alignment = MinAlign(Alignment, 4U);
12758       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12759                                  ST->getPointerInfo().getWithOffset(4),
12760                                  Alignment, MMOFlags, AAInfo);
12761       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12762                          St0, St1);
12763     }
12764 
12765     return SDValue();
12766   }
12767 }
12768 
12769 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12770   StoreSDNode *ST  = cast<StoreSDNode>(N);
12771   SDValue Chain = ST->getChain();
12772   SDValue Value = ST->getValue();
12773   SDValue Ptr   = ST->getBasePtr();
12774 
12775   // If this is a store of a bit convert, store the input value if the
12776   // resultant store does not need a higher alignment than the original.
12777   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12778       ST->isUnindexed()) {
12779     EVT SVT = Value.getOperand(0).getValueType();
12780     if (((!LegalOperations && !ST->isVolatile()) ||
12781          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12782         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12783       unsigned OrigAlign = ST->getAlignment();
12784       bool Fast = false;
12785       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12786                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
12787           Fast) {
12788         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12789                             ST->getPointerInfo(), OrigAlign,
12790                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
12791       }
12792     }
12793   }
12794 
12795   // Turn 'store undef, Ptr' -> nothing.
12796   if (Value.isUndef() && ST->isUnindexed())
12797     return Chain;
12798 
12799   // Try to infer better alignment information than the store already has.
12800   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12801     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12802       if (Align > ST->getAlignment()) {
12803         SDValue NewStore =
12804             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12805                               ST->getMemoryVT(), Align,
12806                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
12807         if (NewStore.getNode() != N)
12808           return CombineTo(ST, NewStore, true);
12809       }
12810     }
12811   }
12812 
12813   // Try transforming a pair floating point load / store ops to integer
12814   // load / store ops.
12815   if (SDValue NewST = TransformFPLoadStorePair(N))
12816     return NewST;
12817 
12818   if (ST->isUnindexed()) {
12819     // Walk up chain skipping non-aliasing memory nodes, on this store and any
12820     // adjacent stores.
12821     if (findBetterNeighborChains(ST)) {
12822       // replaceStoreChain uses CombineTo, which handled all of the worklist
12823       // manipulation. Return the original node to not do anything else.
12824       return SDValue(ST, 0);
12825     }
12826     Chain = ST->getChain();
12827   }
12828 
12829   // Try transforming N to an indexed store.
12830   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12831     return SDValue(N, 0);
12832 
12833   // FIXME: is there such a thing as a truncating indexed store?
12834   if (ST->isTruncatingStore() && ST->isUnindexed() &&
12835       Value.getValueType().isInteger()) {
12836     // See if we can simplify the input to this truncstore with knowledge that
12837     // only the low bits are being used.  For example:
12838     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
12839     SDValue Shorter = GetDemandedBits(
12840         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12841                                     ST->getMemoryVT().getScalarSizeInBits()));
12842     AddToWorklist(Value.getNode());
12843     if (Shorter.getNode())
12844       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12845                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
12846 
12847     // Otherwise, see if we can simplify the operation with
12848     // SimplifyDemandedBits, which only works if the value has a single use.
12849     if (SimplifyDemandedBits(
12850             Value,
12851             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12852                                  ST->getMemoryVT().getScalarSizeInBits()))) {
12853       // Re-visit the store if anything changed and the store hasn't been merged
12854       // with another node (N is deleted) SimplifyDemandedBits will add Value's
12855       // node back to the worklist if necessary, but we also need to re-visit
12856       // the Store node itself.
12857       if (N->getOpcode() != ISD::DELETED_NODE)
12858         AddToWorklist(N);
12859       return SDValue(N, 0);
12860     }
12861   }
12862 
12863   // If this is a load followed by a store to the same location, then the store
12864   // is dead/noop.
12865   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12866     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12867         ST->isUnindexed() && !ST->isVolatile() &&
12868         // There can't be any side effects between the load and store, such as
12869         // a call or store.
12870         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12871       // The store is dead, remove it.
12872       return Chain;
12873     }
12874   }
12875 
12876   // If this is a store followed by a store with the same value to the same
12877   // location, then the store is dead/noop.
12878   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12879     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12880         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12881         ST1->isUnindexed() && !ST1->isVolatile()) {
12882       // The store is dead, remove it.
12883       return Chain;
12884     }
12885   }
12886 
12887   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12888   // truncating store.  We can do this even if this is already a truncstore.
12889   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12890       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12891       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12892                             ST->getMemoryVT())) {
12893     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12894                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
12895   }
12896 
12897   // Only perform this optimization before the types are legal, because we
12898   // don't want to perform this optimization on every DAGCombine invocation.
12899   if (!LegalTypes) {
12900     for (;;) {
12901       // There can be multiple store sequences on the same chain.
12902       // Keep trying to merge store sequences until we are unable to do so
12903       // or until we merge the last store on the chain.
12904       bool Changed = MergeConsecutiveStores(ST);
12905       if (!Changed) break;
12906       // Return N as merge only uses CombineTo and no worklist clean
12907       // up is necessary.
12908       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
12909         return SDValue(N, 0);
12910     }
12911   }
12912 
12913   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12914   //
12915   // Make sure to do this only after attempting to merge stores in order to
12916   //  avoid changing the types of some subset of stores due to visit order,
12917   //  preventing their merging.
12918   if (isa<ConstantFPSDNode>(ST->getValue())) {
12919     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12920       return NewSt;
12921   }
12922 
12923   if (SDValue NewSt = splitMergedValStore(ST))
12924     return NewSt;
12925 
12926   return ReduceLoadOpStoreWidth(N);
12927 }
12928 
12929 /// For the instruction sequence of store below, F and I values
12930 /// are bundled together as an i64 value before being stored into memory.
12931 /// Sometimes it is more efficent to generate separate stores for F and I,
12932 /// which can remove the bitwise instructions or sink them to colder places.
12933 ///
12934 ///   (store (or (zext (bitcast F to i32) to i64),
12935 ///              (shl (zext I to i64), 32)), addr)  -->
12936 ///   (store F, addr) and (store I, addr+4)
12937 ///
12938 /// Similarly, splitting for other merged store can also be beneficial, like:
12939 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12940 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12941 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12942 /// For pair of {i16, i8},  i32 store --> two i16 stores.
12943 /// For pair of {i8, i8},   i16 store --> two i8 stores.
12944 ///
12945 /// We allow each target to determine specifically which kind of splitting is
12946 /// supported.
12947 ///
12948 /// The store patterns are commonly seen from the simple code snippet below
12949 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12950 ///   void goo(const std::pair<int, float> &);
12951 ///   hoo() {
12952 ///     ...
12953 ///     goo(std::make_pair(tmp, ftmp));
12954 ///     ...
12955 ///   }
12956 ///
12957 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12958   if (OptLevel == CodeGenOpt::None)
12959     return SDValue();
12960 
12961   SDValue Val = ST->getValue();
12962   SDLoc DL(ST);
12963 
12964   // Match OR operand.
12965   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12966     return SDValue();
12967 
12968   // Match SHL operand and get Lower and Higher parts of Val.
12969   SDValue Op1 = Val.getOperand(0);
12970   SDValue Op2 = Val.getOperand(1);
12971   SDValue Lo, Hi;
12972   if (Op1.getOpcode() != ISD::SHL) {
12973     std::swap(Op1, Op2);
12974     if (Op1.getOpcode() != ISD::SHL)
12975       return SDValue();
12976   }
12977   Lo = Op2;
12978   Hi = Op1.getOperand(0);
12979   if (!Op1.hasOneUse())
12980     return SDValue();
12981 
12982   // Match shift amount to HalfValBitSize.
12983   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
12984   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
12985   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12986     return SDValue();
12987 
12988   // Lo and Hi are zero-extended from int with size less equal than 32
12989   // to i64.
12990   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12991       !Lo.getOperand(0).getValueType().isScalarInteger() ||
12992       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
12993       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12994       !Hi.getOperand(0).getValueType().isScalarInteger() ||
12995       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
12996     return SDValue();
12997 
12998   // Use the EVT of low and high parts before bitcast as the input
12999   // of target query.
13000   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13001                   ? Lo.getOperand(0).getValueType()
13002                   : Lo.getValueType();
13003   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13004                    ? Hi.getOperand(0).getValueType()
13005                    : Hi.getValueType();
13006   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13007     return SDValue();
13008 
13009   // Start to split store.
13010   unsigned Alignment = ST->getAlignment();
13011   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13012   AAMDNodes AAInfo = ST->getAAInfo();
13013 
13014   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13015   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13016   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13017   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13018 
13019   SDValue Chain = ST->getChain();
13020   SDValue Ptr = ST->getBasePtr();
13021   // Lower value store.
13022   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13023                              ST->getAlignment(), MMOFlags, AAInfo);
13024   Ptr =
13025       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13026                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13027   // Higher value store.
13028   SDValue St1 =
13029       DAG.getStore(St0, DL, Hi, Ptr,
13030                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
13031                    Alignment / 2, MMOFlags, AAInfo);
13032   return St1;
13033 }
13034 
13035 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
13036   SDValue InVec = N->getOperand(0);
13037   SDValue InVal = N->getOperand(1);
13038   SDValue EltNo = N->getOperand(2);
13039   SDLoc DL(N);
13040 
13041   // If the inserted element is an UNDEF, just use the input vector.
13042   if (InVal.isUndef())
13043     return InVec;
13044 
13045   EVT VT = InVec.getValueType();
13046 
13047   // Check that we know which element is being inserted
13048   if (!isa<ConstantSDNode>(EltNo))
13049     return SDValue();
13050   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13051 
13052   // Canonicalize insert_vector_elt dag nodes.
13053   // Example:
13054   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
13055   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
13056   //
13057   // Do this only if the child insert_vector node has one use; also
13058   // do this only if indices are both constants and Idx1 < Idx0.
13059   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
13060       && isa<ConstantSDNode>(InVec.getOperand(2))) {
13061     unsigned OtherElt =
13062       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
13063     if (Elt < OtherElt) {
13064       // Swap nodes.
13065       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
13066                                   InVec.getOperand(0), InVal, EltNo);
13067       AddToWorklist(NewOp.getNode());
13068       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
13069                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
13070     }
13071   }
13072 
13073   // If we can't generate a legal BUILD_VECTOR, exit
13074   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
13075     return SDValue();
13076 
13077   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
13078   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
13079   // vector elements.
13080   SmallVector<SDValue, 8> Ops;
13081   // Do not combine these two vectors if the output vector will not replace
13082   // the input vector.
13083   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
13084     Ops.append(InVec.getNode()->op_begin(),
13085                InVec.getNode()->op_end());
13086   } else if (InVec.isUndef()) {
13087     unsigned NElts = VT.getVectorNumElements();
13088     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
13089   } else {
13090     return SDValue();
13091   }
13092 
13093   // Insert the element
13094   if (Elt < Ops.size()) {
13095     // All the operands of BUILD_VECTOR must have the same type;
13096     // we enforce that here.
13097     EVT OpVT = Ops[0].getValueType();
13098     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
13099   }
13100 
13101   // Return the new vector
13102   return DAG.getBuildVector(VT, DL, Ops);
13103 }
13104 
13105 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
13106     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
13107   assert(!OriginalLoad->isVolatile());
13108 
13109   EVT ResultVT = EVE->getValueType(0);
13110   EVT VecEltVT = InVecVT.getVectorElementType();
13111   unsigned Align = OriginalLoad->getAlignment();
13112   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
13113       VecEltVT.getTypeForEVT(*DAG.getContext()));
13114 
13115   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13116     return SDValue();
13117 
13118   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
13119     ISD::NON_EXTLOAD : ISD::EXTLOAD;
13120   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
13121     return SDValue();
13122 
13123   Align = NewAlign;
13124 
13125   SDValue NewPtr = OriginalLoad->getBasePtr();
13126   SDValue Offset;
13127   EVT PtrType = NewPtr.getValueType();
13128   MachinePointerInfo MPI;
13129   SDLoc DL(EVE);
13130   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13131     int Elt = ConstEltNo->getZExtValue();
13132     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
13133     Offset = DAG.getConstant(PtrOff, DL, PtrType);
13134     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
13135   } else {
13136     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
13137     Offset = DAG.getNode(
13138         ISD::MUL, DL, PtrType, Offset,
13139         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
13140     MPI = OriginalLoad->getPointerInfo();
13141   }
13142   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
13143 
13144   // The replacement we need to do here is a little tricky: we need to
13145   // replace an extractelement of a load with a load.
13146   // Use ReplaceAllUsesOfValuesWith to do the replacement.
13147   // Note that this replacement assumes that the extractvalue is the only
13148   // use of the load; that's okay because we don't want to perform this
13149   // transformation in other cases anyway.
13150   SDValue Load;
13151   SDValue Chain;
13152   if (ResultVT.bitsGT(VecEltVT)) {
13153     // If the result type of vextract is wider than the load, then issue an
13154     // extending load instead.
13155     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
13156                                                   VecEltVT)
13157                                    ? ISD::ZEXTLOAD
13158                                    : ISD::EXTLOAD;
13159     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
13160                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
13161                           Align, OriginalLoad->getMemOperand()->getFlags(),
13162                           OriginalLoad->getAAInfo());
13163     Chain = Load.getValue(1);
13164   } else {
13165     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
13166                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
13167                        OriginalLoad->getAAInfo());
13168     Chain = Load.getValue(1);
13169     if (ResultVT.bitsLT(VecEltVT))
13170       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
13171     else
13172       Load = DAG.getBitcast(ResultVT, Load);
13173   }
13174   WorklistRemover DeadNodes(*this);
13175   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
13176   SDValue To[] = { Load, Chain };
13177   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
13178   // Since we're explicitly calling ReplaceAllUses, add the new node to the
13179   // worklist explicitly as well.
13180   AddToWorklist(Load.getNode());
13181   AddUsersToWorklist(Load.getNode()); // Add users too
13182   // Make sure to revisit this node to clean it up; it will usually be dead.
13183   AddToWorklist(EVE);
13184   ++OpsNarrowed;
13185   return SDValue(EVE, 0);
13186 }
13187 
13188 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
13189   // (vextract (scalar_to_vector val, 0) -> val
13190   SDValue InVec = N->getOperand(0);
13191   EVT VT = InVec.getValueType();
13192   EVT NVT = N->getValueType(0);
13193 
13194   if (InVec.isUndef())
13195     return DAG.getUNDEF(NVT);
13196 
13197   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13198     // Check if the result type doesn't match the inserted element type. A
13199     // SCALAR_TO_VECTOR may truncate the inserted element and the
13200     // EXTRACT_VECTOR_ELT may widen the extracted vector.
13201     SDValue InOp = InVec.getOperand(0);
13202     if (InOp.getValueType() != NVT) {
13203       assert(InOp.getValueType().isInteger() && NVT.isInteger());
13204       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
13205     }
13206     return InOp;
13207   }
13208 
13209   SDValue EltNo = N->getOperand(1);
13210   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
13211 
13212   // extract_vector_elt (build_vector x, y), 1 -> y
13213   if (ConstEltNo &&
13214       InVec.getOpcode() == ISD::BUILD_VECTOR &&
13215       TLI.isTypeLegal(VT) &&
13216       (InVec.hasOneUse() ||
13217        TLI.aggressivelyPreferBuildVectorSources(VT))) {
13218     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
13219     EVT InEltVT = Elt.getValueType();
13220 
13221     // Sometimes build_vector's scalar input types do not match result type.
13222     if (NVT == InEltVT)
13223       return Elt;
13224 
13225     // TODO: It may be useful to truncate if free if the build_vector implicitly
13226     // converts.
13227   }
13228 
13229   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
13230   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
13231       ConstEltNo->isNullValue() && VT.isInteger()) {
13232     SDValue BCSrc = InVec.getOperand(0);
13233     if (BCSrc.getValueType().isScalarInteger())
13234       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
13235   }
13236 
13237   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
13238   //
13239   // This only really matters if the index is non-constant since other combines
13240   // on the constant elements already work.
13241   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
13242       EltNo == InVec.getOperand(2)) {
13243     SDValue Elt = InVec.getOperand(1);
13244     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
13245   }
13246 
13247   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
13248   // We only perform this optimization before the op legalization phase because
13249   // we may introduce new vector instructions which are not backed by TD
13250   // patterns. For example on AVX, extracting elements from a wide vector
13251   // without using extract_subvector. However, if we can find an underlying
13252   // scalar value, then we can always use that.
13253   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
13254     int NumElem = VT.getVectorNumElements();
13255     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
13256     // Find the new index to extract from.
13257     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
13258 
13259     // Extracting an undef index is undef.
13260     if (OrigElt == -1)
13261       return DAG.getUNDEF(NVT);
13262 
13263     // Select the right vector half to extract from.
13264     SDValue SVInVec;
13265     if (OrigElt < NumElem) {
13266       SVInVec = InVec->getOperand(0);
13267     } else {
13268       SVInVec = InVec->getOperand(1);
13269       OrigElt -= NumElem;
13270     }
13271 
13272     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
13273       SDValue InOp = SVInVec.getOperand(OrigElt);
13274       if (InOp.getValueType() != NVT) {
13275         assert(InOp.getValueType().isInteger() && NVT.isInteger());
13276         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
13277       }
13278 
13279       return InOp;
13280     }
13281 
13282     // FIXME: We should handle recursing on other vector shuffles and
13283     // scalar_to_vector here as well.
13284 
13285     if (!LegalOperations) {
13286       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13287       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
13288                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
13289     }
13290   }
13291 
13292   bool BCNumEltsChanged = false;
13293   EVT ExtVT = VT.getVectorElementType();
13294   EVT LVT = ExtVT;
13295 
13296   // If the result of load has to be truncated, then it's not necessarily
13297   // profitable.
13298   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
13299     return SDValue();
13300 
13301   if (InVec.getOpcode() == ISD::BITCAST) {
13302     // Don't duplicate a load with other uses.
13303     if (!InVec.hasOneUse())
13304       return SDValue();
13305 
13306     EVT BCVT = InVec.getOperand(0).getValueType();
13307     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
13308       return SDValue();
13309     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
13310       BCNumEltsChanged = true;
13311     InVec = InVec.getOperand(0);
13312     ExtVT = BCVT.getVectorElementType();
13313   }
13314 
13315   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
13316   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
13317       ISD::isNormalLoad(InVec.getNode()) &&
13318       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
13319     SDValue Index = N->getOperand(1);
13320     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
13321       if (!OrigLoad->isVolatile()) {
13322         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
13323                                                              OrigLoad);
13324       }
13325     }
13326   }
13327 
13328   // Perform only after legalization to ensure build_vector / vector_shuffle
13329   // optimizations have already been done.
13330   if (!LegalOperations) return SDValue();
13331 
13332   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
13333   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
13334   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
13335 
13336   if (ConstEltNo) {
13337     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13338 
13339     LoadSDNode *LN0 = nullptr;
13340     const ShuffleVectorSDNode *SVN = nullptr;
13341     if (ISD::isNormalLoad(InVec.getNode())) {
13342       LN0 = cast<LoadSDNode>(InVec);
13343     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13344                InVec.getOperand(0).getValueType() == ExtVT &&
13345                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
13346       // Don't duplicate a load with other uses.
13347       if (!InVec.hasOneUse())
13348         return SDValue();
13349 
13350       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
13351     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
13352       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
13353       // =>
13354       // (load $addr+1*size)
13355 
13356       // Don't duplicate a load with other uses.
13357       if (!InVec.hasOneUse())
13358         return SDValue();
13359 
13360       // If the bit convert changed the number of elements, it is unsafe
13361       // to examine the mask.
13362       if (BCNumEltsChanged)
13363         return SDValue();
13364 
13365       // Select the input vector, guarding against out of range extract vector.
13366       unsigned NumElems = VT.getVectorNumElements();
13367       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
13368       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
13369 
13370       if (InVec.getOpcode() == ISD::BITCAST) {
13371         // Don't duplicate a load with other uses.
13372         if (!InVec.hasOneUse())
13373           return SDValue();
13374 
13375         InVec = InVec.getOperand(0);
13376       }
13377       if (ISD::isNormalLoad(InVec.getNode())) {
13378         LN0 = cast<LoadSDNode>(InVec);
13379         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
13380         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
13381       }
13382     }
13383 
13384     // Make sure we found a non-volatile load and the extractelement is
13385     // the only use.
13386     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
13387       return SDValue();
13388 
13389     // If Idx was -1 above, Elt is going to be -1, so just return undef.
13390     if (Elt == -1)
13391       return DAG.getUNDEF(LVT);
13392 
13393     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
13394   }
13395 
13396   return SDValue();
13397 }
13398 
13399 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
13400 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
13401   // We perform this optimization post type-legalization because
13402   // the type-legalizer often scalarizes integer-promoted vectors.
13403   // Performing this optimization before may create bit-casts which
13404   // will be type-legalized to complex code sequences.
13405   // We perform this optimization only before the operation legalizer because we
13406   // may introduce illegal operations.
13407   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
13408     return SDValue();
13409 
13410   unsigned NumInScalars = N->getNumOperands();
13411   SDLoc DL(N);
13412   EVT VT = N->getValueType(0);
13413 
13414   // Check to see if this is a BUILD_VECTOR of a bunch of values
13415   // which come from any_extend or zero_extend nodes. If so, we can create
13416   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
13417   // optimizations. We do not handle sign-extend because we can't fill the sign
13418   // using shuffles.
13419   EVT SourceType = MVT::Other;
13420   bool AllAnyExt = true;
13421 
13422   for (unsigned i = 0; i != NumInScalars; ++i) {
13423     SDValue In = N->getOperand(i);
13424     // Ignore undef inputs.
13425     if (In.isUndef()) continue;
13426 
13427     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
13428     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
13429 
13430     // Abort if the element is not an extension.
13431     if (!ZeroExt && !AnyExt) {
13432       SourceType = MVT::Other;
13433       break;
13434     }
13435 
13436     // The input is a ZeroExt or AnyExt. Check the original type.
13437     EVT InTy = In.getOperand(0).getValueType();
13438 
13439     // Check that all of the widened source types are the same.
13440     if (SourceType == MVT::Other)
13441       // First time.
13442       SourceType = InTy;
13443     else if (InTy != SourceType) {
13444       // Multiple income types. Abort.
13445       SourceType = MVT::Other;
13446       break;
13447     }
13448 
13449     // Check if all of the extends are ANY_EXTENDs.
13450     AllAnyExt &= AnyExt;
13451   }
13452 
13453   // In order to have valid types, all of the inputs must be extended from the
13454   // same source type and all of the inputs must be any or zero extend.
13455   // Scalar sizes must be a power of two.
13456   EVT OutScalarTy = VT.getScalarType();
13457   bool ValidTypes = SourceType != MVT::Other &&
13458                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
13459                  isPowerOf2_32(SourceType.getSizeInBits());
13460 
13461   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
13462   // turn into a single shuffle instruction.
13463   if (!ValidTypes)
13464     return SDValue();
13465 
13466   bool isLE = DAG.getDataLayout().isLittleEndian();
13467   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
13468   assert(ElemRatio > 1 && "Invalid element size ratio");
13469   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
13470                                DAG.getConstant(0, DL, SourceType);
13471 
13472   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
13473   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
13474 
13475   // Populate the new build_vector
13476   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13477     SDValue Cast = N->getOperand(i);
13478     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
13479             Cast.getOpcode() == ISD::ZERO_EXTEND ||
13480             Cast.isUndef()) && "Invalid cast opcode");
13481     SDValue In;
13482     if (Cast.isUndef())
13483       In = DAG.getUNDEF(SourceType);
13484     else
13485       In = Cast->getOperand(0);
13486     unsigned Index = isLE ? (i * ElemRatio) :
13487                             (i * ElemRatio + (ElemRatio - 1));
13488 
13489     assert(Index < Ops.size() && "Invalid index");
13490     Ops[Index] = In;
13491   }
13492 
13493   // The type of the new BUILD_VECTOR node.
13494   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
13495   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
13496          "Invalid vector size");
13497   // Check if the new vector type is legal.
13498   if (!isTypeLegal(VecVT)) return SDValue();
13499 
13500   // Make the new BUILD_VECTOR.
13501   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
13502 
13503   // The new BUILD_VECTOR node has the potential to be further optimized.
13504   AddToWorklist(BV.getNode());
13505   // Bitcast to the desired type.
13506   return DAG.getBitcast(VT, BV);
13507 }
13508 
13509 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
13510   EVT VT = N->getValueType(0);
13511 
13512   unsigned NumInScalars = N->getNumOperands();
13513   SDLoc DL(N);
13514 
13515   EVT SrcVT = MVT::Other;
13516   unsigned Opcode = ISD::DELETED_NODE;
13517   unsigned NumDefs = 0;
13518 
13519   for (unsigned i = 0; i != NumInScalars; ++i) {
13520     SDValue In = N->getOperand(i);
13521     unsigned Opc = In.getOpcode();
13522 
13523     if (Opc == ISD::UNDEF)
13524       continue;
13525 
13526     // If all scalar values are floats and converted from integers.
13527     if (Opcode == ISD::DELETED_NODE &&
13528         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
13529       Opcode = Opc;
13530     }
13531 
13532     if (Opc != Opcode)
13533       return SDValue();
13534 
13535     EVT InVT = In.getOperand(0).getValueType();
13536 
13537     // If all scalar values are typed differently, bail out. It's chosen to
13538     // simplify BUILD_VECTOR of integer types.
13539     if (SrcVT == MVT::Other)
13540       SrcVT = InVT;
13541     if (SrcVT != InVT)
13542       return SDValue();
13543     NumDefs++;
13544   }
13545 
13546   // If the vector has just one element defined, it's not worth to fold it into
13547   // a vectorized one.
13548   if (NumDefs < 2)
13549     return SDValue();
13550 
13551   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
13552          && "Should only handle conversion from integer to float.");
13553   assert(SrcVT != MVT::Other && "Cannot determine source type!");
13554 
13555   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13556 
13557   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13558     return SDValue();
13559 
13560   // Just because the floating-point vector type is legal does not necessarily
13561   // mean that the corresponding integer vector type is.
13562   if (!isTypeLegal(NVT))
13563     return SDValue();
13564 
13565   SmallVector<SDValue, 8> Opnds;
13566   for (unsigned i = 0; i != NumInScalars; ++i) {
13567     SDValue In = N->getOperand(i);
13568 
13569     if (In.isUndef())
13570       Opnds.push_back(DAG.getUNDEF(SrcVT));
13571     else
13572       Opnds.push_back(In.getOperand(0));
13573   }
13574   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13575   AddToWorklist(BV.getNode());
13576 
13577   return DAG.getNode(Opcode, DL, VT, BV);
13578 }
13579 
13580 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
13581                                            ArrayRef<int> VectorMask,
13582                                            SDValue VecIn1, SDValue VecIn2,
13583                                            unsigned LeftIdx) {
13584   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13585   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13586 
13587   EVT VT = N->getValueType(0);
13588   EVT InVT1 = VecIn1.getValueType();
13589   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13590 
13591   unsigned Vec2Offset = InVT1.getVectorNumElements();
13592   unsigned NumElems = VT.getVectorNumElements();
13593   unsigned ShuffleNumElems = NumElems;
13594 
13595   // We can't generate a shuffle node with mismatched input and output types.
13596   // Try to make the types match the type of the output.
13597   if (InVT1 != VT || InVT2 != VT) {
13598     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13599       // If the output vector length is a multiple of both input lengths,
13600       // we can concatenate them and pad the rest with undefs.
13601       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
13602       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
13603       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
13604       ConcatOps[0] = VecIn1;
13605       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
13606       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13607       VecIn2 = SDValue();
13608     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
13609       if (!TLI.isExtractSubvectorCheap(VT, NumElems))
13610         return SDValue();
13611 
13612       if (!VecIn2.getNode()) {
13613         // If we only have one input vector, and it's twice the size of the
13614         // output, split it in two.
13615         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
13616                              DAG.getConstant(NumElems, DL, IdxTy));
13617         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
13618         // Since we now have shorter input vectors, adjust the offset of the
13619         // second vector's start.
13620         Vec2Offset = NumElems;
13621       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
13622         // VecIn1 is wider than the output, and we have another, possibly
13623         // smaller input. Pad the smaller input with undefs, shuffle at the
13624         // input vector width, and extract the output.
13625         // The shuffle type is different than VT, so check legality again.
13626         if (LegalOperations &&
13627             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
13628           return SDValue();
13629 
13630         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
13631         // lower it back into a BUILD_VECTOR. So if the inserted type is
13632         // illegal, don't even try.
13633         if (InVT1 != InVT2) {
13634           if (!TLI.isTypeLegal(InVT2))
13635             return SDValue();
13636           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
13637                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
13638         }
13639         ShuffleNumElems = NumElems * 2;
13640       } else {
13641         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
13642         // than VecIn1. We can't handle this for now - this case will disappear
13643         // when we start sorting the vectors by type.
13644         return SDValue();
13645       }
13646     } else {
13647       // TODO: Support cases where the length mismatch isn't exactly by a
13648       // factor of 2.
13649       // TODO: Move this check upwards, so that if we have bad type
13650       // mismatches, we don't create any DAG nodes.
13651       return SDValue();
13652     }
13653   }
13654 
13655   // Initialize mask to undef.
13656   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13657 
13658   // Only need to run up to the number of elements actually used, not the
13659   // total number of elements in the shuffle - if we are shuffling a wider
13660   // vector, the high lanes should be set to undef.
13661   for (unsigned i = 0; i != NumElems; ++i) {
13662     if (VectorMask[i] <= 0)
13663       continue;
13664 
13665     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
13666     if (VectorMask[i] == (int)LeftIdx) {
13667       Mask[i] = ExtIndex;
13668     } else if (VectorMask[i] == (int)LeftIdx + 1) {
13669       Mask[i] = Vec2Offset + ExtIndex;
13670     }
13671   }
13672 
13673   // The type the input vectors may have changed above.
13674   InVT1 = VecIn1.getValueType();
13675 
13676   // If we already have a VecIn2, it should have the same type as VecIn1.
13677   // If we don't, get an undef/zero vector of the appropriate type.
13678   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
13679   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
13680 
13681   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
13682   if (ShuffleNumElems > NumElems)
13683     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
13684 
13685   return Shuffle;
13686 }
13687 
13688 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
13689 // operations. If the types of the vectors we're extracting from allow it,
13690 // turn this into a vector_shuffle node.
13691 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
13692   SDLoc DL(N);
13693   EVT VT = N->getValueType(0);
13694 
13695   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
13696   if (!isTypeLegal(VT))
13697     return SDValue();
13698 
13699   // May only combine to shuffle after legalize if shuffle is legal.
13700   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
13701     return SDValue();
13702 
13703   bool UsesZeroVector = false;
13704   unsigned NumElems = N->getNumOperands();
13705 
13706   // Record, for each element of the newly built vector, which input vector
13707   // that element comes from. -1 stands for undef, 0 for the zero vector,
13708   // and positive values for the input vectors.
13709   // VectorMask maps each element to its vector number, and VecIn maps vector
13710   // numbers to their initial SDValues.
13711 
13712   SmallVector<int, 8> VectorMask(NumElems, -1);
13713   SmallVector<SDValue, 8> VecIn;
13714   VecIn.push_back(SDValue());
13715 
13716   for (unsigned i = 0; i != NumElems; ++i) {
13717     SDValue Op = N->getOperand(i);
13718 
13719     if (Op.isUndef())
13720       continue;
13721 
13722     // See if we can use a blend with a zero vector.
13723     // TODO: Should we generalize this to a blend with an arbitrary constant
13724     // vector?
13725     if (isNullConstant(Op) || isNullFPConstant(Op)) {
13726       UsesZeroVector = true;
13727       VectorMask[i] = 0;
13728       continue;
13729     }
13730 
13731     // Not an undef or zero. If the input is something other than an
13732     // EXTRACT_VECTOR_ELT with a constant index, bail out.
13733     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13734         !isa<ConstantSDNode>(Op.getOperand(1)))
13735       return SDValue();
13736 
13737     SDValue ExtractedFromVec = Op.getOperand(0);
13738 
13739     // All inputs must have the same element type as the output.
13740     if (VT.getVectorElementType() !=
13741         ExtractedFromVec.getValueType().getVectorElementType())
13742       return SDValue();
13743 
13744     // Have we seen this input vector before?
13745     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
13746     // a map back from SDValues to numbers isn't worth it.
13747     unsigned Idx = std::distance(
13748         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
13749     if (Idx == VecIn.size())
13750       VecIn.push_back(ExtractedFromVec);
13751 
13752     VectorMask[i] = Idx;
13753   }
13754 
13755   // If we didn't find at least one input vector, bail out.
13756   if (VecIn.size() < 2)
13757     return SDValue();
13758 
13759   // TODO: We want to sort the vectors by descending length, so that adjacent
13760   // pairs have similar length, and the longer vector is always first in the
13761   // pair.
13762 
13763   // TODO: Should this fire if some of the input vectors has illegal type (like
13764   // it does now), or should we let legalization run its course first?
13765 
13766   // Shuffle phase:
13767   // Take pairs of vectors, and shuffle them so that the result has elements
13768   // from these vectors in the correct places.
13769   // For example, given:
13770   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
13771   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
13772   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
13773   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
13774   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
13775   // We will generate:
13776   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
13777   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
13778   SmallVector<SDValue, 4> Shuffles;
13779   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
13780     unsigned LeftIdx = 2 * In + 1;
13781     SDValue VecLeft = VecIn[LeftIdx];
13782     SDValue VecRight =
13783         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
13784 
13785     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
13786                                                 VecRight, LeftIdx))
13787       Shuffles.push_back(Shuffle);
13788     else
13789       return SDValue();
13790   }
13791 
13792   // If we need the zero vector as an "ingredient" in the blend tree, add it
13793   // to the list of shuffles.
13794   if (UsesZeroVector)
13795     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
13796                                       : DAG.getConstantFP(0.0, DL, VT));
13797 
13798   // If we only have one shuffle, we're done.
13799   if (Shuffles.size() == 1)
13800     return Shuffles[0];
13801 
13802   // Update the vector mask to point to the post-shuffle vectors.
13803   for (int &Vec : VectorMask)
13804     if (Vec == 0)
13805       Vec = Shuffles.size() - 1;
13806     else
13807       Vec = (Vec - 1) / 2;
13808 
13809   // More than one shuffle. Generate a binary tree of blends, e.g. if from
13810   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
13811   // generate:
13812   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
13813   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
13814   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
13815   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
13816   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
13817   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
13818   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
13819 
13820   // Make sure the initial size of the shuffle list is even.
13821   if (Shuffles.size() % 2)
13822     Shuffles.push_back(DAG.getUNDEF(VT));
13823 
13824   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
13825     if (CurSize % 2) {
13826       Shuffles[CurSize] = DAG.getUNDEF(VT);
13827       CurSize++;
13828     }
13829     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
13830       int Left = 2 * In;
13831       int Right = 2 * In + 1;
13832       SmallVector<int, 8> Mask(NumElems, -1);
13833       for (unsigned i = 0; i != NumElems; ++i) {
13834         if (VectorMask[i] == Left) {
13835           Mask[i] = i;
13836           VectorMask[i] = In;
13837         } else if (VectorMask[i] == Right) {
13838           Mask[i] = i + NumElems;
13839           VectorMask[i] = In;
13840         }
13841       }
13842 
13843       Shuffles[In] =
13844           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
13845     }
13846   }
13847 
13848   return Shuffles[0];
13849 }
13850 
13851 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13852   EVT VT = N->getValueType(0);
13853 
13854   // A vector built entirely of undefs is undef.
13855   if (ISD::allOperandsUndef(N))
13856     return DAG.getUNDEF(VT);
13857 
13858   // Check if we can express BUILD VECTOR via subvector extract.
13859   if (!LegalTypes && (N->getNumOperands() > 1)) {
13860     SDValue Op0 = N->getOperand(0);
13861     auto checkElem = [&](SDValue Op) -> uint64_t {
13862       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
13863           (Op0.getOperand(0) == Op.getOperand(0)))
13864         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
13865           return CNode->getZExtValue();
13866       return -1;
13867     };
13868 
13869     int Offset = checkElem(Op0);
13870     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
13871       if (Offset + i != checkElem(N->getOperand(i))) {
13872         Offset = -1;
13873         break;
13874       }
13875     }
13876 
13877     if ((Offset == 0) &&
13878         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
13879       return Op0.getOperand(0);
13880     if ((Offset != -1) &&
13881         ((Offset % N->getValueType(0).getVectorNumElements()) ==
13882          0)) // IDX must be multiple of output size.
13883       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
13884                          Op0.getOperand(0), Op0.getOperand(1));
13885   }
13886 
13887   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13888     return V;
13889 
13890   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13891     return V;
13892 
13893   if (SDValue V = reduceBuildVecToShuffle(N))
13894     return V;
13895 
13896   return SDValue();
13897 }
13898 
13899 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
13900   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13901   EVT OpVT = N->getOperand(0).getValueType();
13902 
13903   // If the operands are legal vectors, leave them alone.
13904   if (TLI.isTypeLegal(OpVT))
13905     return SDValue();
13906 
13907   SDLoc DL(N);
13908   EVT VT = N->getValueType(0);
13909   SmallVector<SDValue, 8> Ops;
13910 
13911   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13912   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13913 
13914   // Keep track of what we encounter.
13915   bool AnyInteger = false;
13916   bool AnyFP = false;
13917   for (const SDValue &Op : N->ops()) {
13918     if (ISD::BITCAST == Op.getOpcode() &&
13919         !Op.getOperand(0).getValueType().isVector())
13920       Ops.push_back(Op.getOperand(0));
13921     else if (ISD::UNDEF == Op.getOpcode())
13922       Ops.push_back(ScalarUndef);
13923     else
13924       return SDValue();
13925 
13926     // Note whether we encounter an integer or floating point scalar.
13927     // If it's neither, bail out, it could be something weird like x86mmx.
13928     EVT LastOpVT = Ops.back().getValueType();
13929     if (LastOpVT.isFloatingPoint())
13930       AnyFP = true;
13931     else if (LastOpVT.isInteger())
13932       AnyInteger = true;
13933     else
13934       return SDValue();
13935   }
13936 
13937   // If any of the operands is a floating point scalar bitcast to a vector,
13938   // use floating point types throughout, and bitcast everything.
13939   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13940   if (AnyFP) {
13941     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13942     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13943     if (AnyInteger) {
13944       for (SDValue &Op : Ops) {
13945         if (Op.getValueType() == SVT)
13946           continue;
13947         if (Op.isUndef())
13948           Op = ScalarUndef;
13949         else
13950           Op = DAG.getBitcast(SVT, Op);
13951       }
13952     }
13953   }
13954 
13955   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13956                                VT.getSizeInBits() / SVT.getSizeInBits());
13957   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13958 }
13959 
13960 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13961 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13962 // most two distinct vectors the same size as the result, attempt to turn this
13963 // into a legal shuffle.
13964 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
13965   EVT VT = N->getValueType(0);
13966   EVT OpVT = N->getOperand(0).getValueType();
13967   int NumElts = VT.getVectorNumElements();
13968   int NumOpElts = OpVT.getVectorNumElements();
13969 
13970   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13971   SmallVector<int, 8> Mask;
13972 
13973   for (SDValue Op : N->ops()) {
13974     // Peek through any bitcast.
13975     while (Op.getOpcode() == ISD::BITCAST)
13976       Op = Op.getOperand(0);
13977 
13978     // UNDEF nodes convert to UNDEF shuffle mask values.
13979     if (Op.isUndef()) {
13980       Mask.append((unsigned)NumOpElts, -1);
13981       continue;
13982     }
13983 
13984     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13985       return SDValue();
13986 
13987     // What vector are we extracting the subvector from and at what index?
13988     SDValue ExtVec = Op.getOperand(0);
13989 
13990     // We want the EVT of the original extraction to correctly scale the
13991     // extraction index.
13992     EVT ExtVT = ExtVec.getValueType();
13993 
13994     // Peek through any bitcast.
13995     while (ExtVec.getOpcode() == ISD::BITCAST)
13996       ExtVec = ExtVec.getOperand(0);
13997 
13998     // UNDEF nodes convert to UNDEF shuffle mask values.
13999     if (ExtVec.isUndef()) {
14000       Mask.append((unsigned)NumOpElts, -1);
14001       continue;
14002     }
14003 
14004     if (!isa<ConstantSDNode>(Op.getOperand(1)))
14005       return SDValue();
14006     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
14007 
14008     // Ensure that we are extracting a subvector from a vector the same
14009     // size as the result.
14010     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
14011       return SDValue();
14012 
14013     // Scale the subvector index to account for any bitcast.
14014     int NumExtElts = ExtVT.getVectorNumElements();
14015     if (0 == (NumExtElts % NumElts))
14016       ExtIdx /= (NumExtElts / NumElts);
14017     else if (0 == (NumElts % NumExtElts))
14018       ExtIdx *= (NumElts / NumExtElts);
14019     else
14020       return SDValue();
14021 
14022     // At most we can reference 2 inputs in the final shuffle.
14023     if (SV0.isUndef() || SV0 == ExtVec) {
14024       SV0 = ExtVec;
14025       for (int i = 0; i != NumOpElts; ++i)
14026         Mask.push_back(i + ExtIdx);
14027     } else if (SV1.isUndef() || SV1 == ExtVec) {
14028       SV1 = ExtVec;
14029       for (int i = 0; i != NumOpElts; ++i)
14030         Mask.push_back(i + ExtIdx + NumElts);
14031     } else {
14032       return SDValue();
14033     }
14034   }
14035 
14036   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
14037     return SDValue();
14038 
14039   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
14040                               DAG.getBitcast(VT, SV1), Mask);
14041 }
14042 
14043 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
14044   // If we only have one input vector, we don't need to do any concatenation.
14045   if (N->getNumOperands() == 1)
14046     return N->getOperand(0);
14047 
14048   // Check if all of the operands are undefs.
14049   EVT VT = N->getValueType(0);
14050   if (ISD::allOperandsUndef(N))
14051     return DAG.getUNDEF(VT);
14052 
14053   // Optimize concat_vectors where all but the first of the vectors are undef.
14054   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
14055         return Op.isUndef();
14056       })) {
14057     SDValue In = N->getOperand(0);
14058     assert(In.getValueType().isVector() && "Must concat vectors");
14059 
14060     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
14061     if (In->getOpcode() == ISD::BITCAST &&
14062         !In->getOperand(0)->getValueType(0).isVector()) {
14063       SDValue Scalar = In->getOperand(0);
14064 
14065       // If the bitcast type isn't legal, it might be a trunc of a legal type;
14066       // look through the trunc so we can still do the transform:
14067       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
14068       if (Scalar->getOpcode() == ISD::TRUNCATE &&
14069           !TLI.isTypeLegal(Scalar.getValueType()) &&
14070           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
14071         Scalar = Scalar->getOperand(0);
14072 
14073       EVT SclTy = Scalar->getValueType(0);
14074 
14075       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
14076         return SDValue();
14077 
14078       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
14079       if (VNTNumElms < 2)
14080         return SDValue();
14081 
14082       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
14083       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
14084         return SDValue();
14085 
14086       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
14087       return DAG.getBitcast(VT, Res);
14088     }
14089   }
14090 
14091   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
14092   // We have already tested above for an UNDEF only concatenation.
14093   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
14094   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
14095   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
14096     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
14097   };
14098   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
14099     SmallVector<SDValue, 8> Opnds;
14100     EVT SVT = VT.getScalarType();
14101 
14102     EVT MinVT = SVT;
14103     if (!SVT.isFloatingPoint()) {
14104       // If BUILD_VECTOR are from built from integer, they may have different
14105       // operand types. Get the smallest type and truncate all operands to it.
14106       bool FoundMinVT = false;
14107       for (const SDValue &Op : N->ops())
14108         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14109           EVT OpSVT = Op.getOperand(0)->getValueType(0);
14110           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
14111           FoundMinVT = true;
14112         }
14113       assert(FoundMinVT && "Concat vector type mismatch");
14114     }
14115 
14116     for (const SDValue &Op : N->ops()) {
14117       EVT OpVT = Op.getValueType();
14118       unsigned NumElts = OpVT.getVectorNumElements();
14119 
14120       if (ISD::UNDEF == Op.getOpcode())
14121         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
14122 
14123       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
14124         if (SVT.isFloatingPoint()) {
14125           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
14126           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
14127         } else {
14128           for (unsigned i = 0; i != NumElts; ++i)
14129             Opnds.push_back(
14130                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
14131         }
14132       }
14133     }
14134 
14135     assert(VT.getVectorNumElements() == Opnds.size() &&
14136            "Concat vector type mismatch");
14137     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14138   }
14139 
14140   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
14141   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
14142     return V;
14143 
14144   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
14145   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14146     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
14147       return V;
14148 
14149   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
14150   // nodes often generate nop CONCAT_VECTOR nodes.
14151   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
14152   // place the incoming vectors at the exact same location.
14153   SDValue SingleSource = SDValue();
14154   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
14155 
14156   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14157     SDValue Op = N->getOperand(i);
14158 
14159     if (Op.isUndef())
14160       continue;
14161 
14162     // Check if this is the identity extract:
14163     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14164       return SDValue();
14165 
14166     // Find the single incoming vector for the extract_subvector.
14167     if (SingleSource.getNode()) {
14168       if (Op.getOperand(0) != SingleSource)
14169         return SDValue();
14170     } else {
14171       SingleSource = Op.getOperand(0);
14172 
14173       // Check the source type is the same as the type of the result.
14174       // If not, this concat may extend the vector, so we can not
14175       // optimize it away.
14176       if (SingleSource.getValueType() != N->getValueType(0))
14177         return SDValue();
14178     }
14179 
14180     unsigned IdentityIndex = i * PartNumElem;
14181     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
14182     // The extract index must be constant.
14183     if (!CS)
14184       return SDValue();
14185 
14186     // Check that we are reading from the identity index.
14187     if (CS->getZExtValue() != IdentityIndex)
14188       return SDValue();
14189   }
14190 
14191   if (SingleSource.getNode())
14192     return SingleSource;
14193 
14194   return SDValue();
14195 }
14196 
14197 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
14198   EVT NVT = N->getValueType(0);
14199   SDValue V = N->getOperand(0);
14200 
14201   // Extract from UNDEF is UNDEF.
14202   if (V.isUndef())
14203     return DAG.getUNDEF(NVT);
14204 
14205   // Combine:
14206   //    (extract_subvec (concat V1, V2, ...), i)
14207   // Into:
14208   //    Vi if possible
14209   // Only operand 0 is checked as 'concat' assumes all inputs of the same
14210   // type.
14211   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
14212       isa<ConstantSDNode>(N->getOperand(1)) &&
14213       V->getOperand(0).getValueType() == NVT) {
14214     unsigned Idx = N->getConstantOperandVal(1);
14215     unsigned NumElems = NVT.getVectorNumElements();
14216     assert((Idx % NumElems) == 0 &&
14217            "IDX in concat is not a multiple of the result vector length.");
14218     return V->getOperand(Idx / NumElems);
14219   }
14220 
14221   // Skip bitcasting
14222   if (V->getOpcode() == ISD::BITCAST)
14223     V = V.getOperand(0);
14224 
14225   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
14226     // Handle only simple case where vector being inserted and vector
14227     // being extracted are of same size.
14228     EVT SmallVT = V->getOperand(1).getValueType();
14229     if (!NVT.bitsEq(SmallVT))
14230       return SDValue();
14231 
14232     // Only handle cases where both indexes are constants.
14233     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14234     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
14235 
14236     if (InsIdx && ExtIdx) {
14237       // Combine:
14238       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
14239       // Into:
14240       //    indices are equal or bit offsets are equal => V1
14241       //    otherwise => (extract_subvec V1, ExtIdx)
14242       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
14243           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
14244         return DAG.getBitcast(NVT, V->getOperand(1));
14245       return DAG.getNode(
14246           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
14247           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
14248           N->getOperand(1));
14249     }
14250   }
14251 
14252   return SDValue();
14253 }
14254 
14255 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
14256                                                  SDValue V, SelectionDAG &DAG) {
14257   SDLoc DL(V);
14258   EVT VT = V.getValueType();
14259 
14260   switch (V.getOpcode()) {
14261   default:
14262     return V;
14263 
14264   case ISD::CONCAT_VECTORS: {
14265     EVT OpVT = V->getOperand(0).getValueType();
14266     int OpSize = OpVT.getVectorNumElements();
14267     SmallBitVector OpUsedElements(OpSize, false);
14268     bool FoundSimplification = false;
14269     SmallVector<SDValue, 4> NewOps;
14270     NewOps.reserve(V->getNumOperands());
14271     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
14272       SDValue Op = V->getOperand(i);
14273       bool OpUsed = false;
14274       for (int j = 0; j < OpSize; ++j)
14275         if (UsedElements[i * OpSize + j]) {
14276           OpUsedElements[j] = true;
14277           OpUsed = true;
14278         }
14279       NewOps.push_back(
14280           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
14281                  : DAG.getUNDEF(OpVT));
14282       FoundSimplification |= Op == NewOps.back();
14283       OpUsedElements.reset();
14284     }
14285     if (FoundSimplification)
14286       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
14287     return V;
14288   }
14289 
14290   case ISD::INSERT_SUBVECTOR: {
14291     SDValue BaseV = V->getOperand(0);
14292     SDValue SubV = V->getOperand(1);
14293     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
14294     if (!IdxN)
14295       return V;
14296 
14297     int SubSize = SubV.getValueType().getVectorNumElements();
14298     int Idx = IdxN->getZExtValue();
14299     bool SubVectorUsed = false;
14300     SmallBitVector SubUsedElements(SubSize, false);
14301     for (int i = 0; i < SubSize; ++i)
14302       if (UsedElements[i + Idx]) {
14303         SubVectorUsed = true;
14304         SubUsedElements[i] = true;
14305         UsedElements[i + Idx] = false;
14306       }
14307 
14308     // Now recurse on both the base and sub vectors.
14309     SDValue SimplifiedSubV =
14310         SubVectorUsed
14311             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
14312             : DAG.getUNDEF(SubV.getValueType());
14313     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
14314     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
14315       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
14316                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
14317     return V;
14318   }
14319   }
14320 }
14321 
14322 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
14323                                        SDValue N1, SelectionDAG &DAG) {
14324   EVT VT = SVN->getValueType(0);
14325   int NumElts = VT.getVectorNumElements();
14326   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
14327   for (int M : SVN->getMask())
14328     if (M >= 0 && M < NumElts)
14329       N0UsedElements[M] = true;
14330     else if (M >= NumElts)
14331       N1UsedElements[M - NumElts] = true;
14332 
14333   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
14334   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
14335   if (S0 == N0 && S1 == N1)
14336     return SDValue();
14337 
14338   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
14339 }
14340 
14341 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
14342 // or turn a shuffle of a single concat into simpler shuffle then concat.
14343 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
14344   EVT VT = N->getValueType(0);
14345   unsigned NumElts = VT.getVectorNumElements();
14346 
14347   SDValue N0 = N->getOperand(0);
14348   SDValue N1 = N->getOperand(1);
14349   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14350 
14351   SmallVector<SDValue, 4> Ops;
14352   EVT ConcatVT = N0.getOperand(0).getValueType();
14353   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
14354   unsigned NumConcats = NumElts / NumElemsPerConcat;
14355 
14356   // Special case: shuffle(concat(A,B)) can be more efficiently represented
14357   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
14358   // half vector elements.
14359   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
14360       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
14361                   SVN->getMask().end(), [](int i) { return i == -1; })) {
14362     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
14363                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
14364     N1 = DAG.getUNDEF(ConcatVT);
14365     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
14366   }
14367 
14368   // Look at every vector that's inserted. We're looking for exact
14369   // subvector-sized copies from a concatenated vector
14370   for (unsigned I = 0; I != NumConcats; ++I) {
14371     // Make sure we're dealing with a copy.
14372     unsigned Begin = I * NumElemsPerConcat;
14373     bool AllUndef = true, NoUndef = true;
14374     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
14375       if (SVN->getMaskElt(J) >= 0)
14376         AllUndef = false;
14377       else
14378         NoUndef = false;
14379     }
14380 
14381     if (NoUndef) {
14382       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
14383         return SDValue();
14384 
14385       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
14386         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
14387           return SDValue();
14388 
14389       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
14390       if (FirstElt < N0.getNumOperands())
14391         Ops.push_back(N0.getOperand(FirstElt));
14392       else
14393         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
14394 
14395     } else if (AllUndef) {
14396       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
14397     } else { // Mixed with general masks and undefs, can't do optimization.
14398       return SDValue();
14399     }
14400   }
14401 
14402   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
14403 }
14404 
14405 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14406 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14407 //
14408 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
14409 // a simplification in some sense, but it isn't appropriate in general: some
14410 // BUILD_VECTORs are substantially cheaper than others. The general case
14411 // of a BUILD_VECTOR requires inserting each element individually (or
14412 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
14413 // all constants is a single constant pool load.  A BUILD_VECTOR where each
14414 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
14415 // are undef lowers to a small number of element insertions.
14416 //
14417 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
14418 // We don't fold shuffles where one side is a non-zero constant, and we don't
14419 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
14420 // non-constant operands. This seems to work out reasonably well in practice.
14421 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
14422                                        SelectionDAG &DAG,
14423                                        const TargetLowering &TLI) {
14424   EVT VT = SVN->getValueType(0);
14425   unsigned NumElts = VT.getVectorNumElements();
14426   SDValue N0 = SVN->getOperand(0);
14427   SDValue N1 = SVN->getOperand(1);
14428 
14429   if (!N0->hasOneUse() || !N1->hasOneUse())
14430     return SDValue();
14431   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
14432   // discussed above.
14433   if (!N1.isUndef()) {
14434     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
14435     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
14436     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
14437       return SDValue();
14438     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
14439       return SDValue();
14440   }
14441 
14442   SmallVector<SDValue, 8> Ops;
14443   SmallSet<SDValue, 16> DuplicateOps;
14444   for (int M : SVN->getMask()) {
14445     SDValue Op = DAG.getUNDEF(VT.getScalarType());
14446     if (M >= 0) {
14447       int Idx = M < (int)NumElts ? M : M - NumElts;
14448       SDValue &S = (M < (int)NumElts ? N0 : N1);
14449       if (S.getOpcode() == ISD::BUILD_VECTOR) {
14450         Op = S.getOperand(Idx);
14451       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14452         if (Idx == 0)
14453           Op = S.getOperand(0);
14454       } else {
14455         // Operand can't be combined - bail out.
14456         return SDValue();
14457       }
14458     }
14459 
14460     // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
14461     // fine, but it's likely to generate low-quality code if the target can't
14462     // reconstruct an appropriate shuffle.
14463     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
14464       if (!DuplicateOps.insert(Op).second)
14465         return SDValue();
14466 
14467     Ops.push_back(Op);
14468   }
14469   // BUILD_VECTOR requires all inputs to be of the same type, find the
14470   // maximum type and extend them all.
14471   EVT SVT = VT.getScalarType();
14472   if (SVT.isInteger())
14473     for (SDValue &Op : Ops)
14474       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
14475   if (SVT != VT.getScalarType())
14476     for (SDValue &Op : Ops)
14477       Op = TLI.isZExtFree(Op.getValueType(), SVT)
14478                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
14479                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
14480   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
14481 }
14482 
14483 // Match shuffles that can be converted to any_vector_extend_in_reg.
14484 // This is often generated during legalization.
14485 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
14486 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
14487 SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
14488                                      SelectionDAG &DAG,
14489                                      const TargetLowering &TLI,
14490                                      bool LegalOperations) {
14491   EVT VT = SVN->getValueType(0);
14492   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14493 
14494   // TODO Add support for big-endian when we have a test case.
14495   if (!VT.isInteger() || IsBigEndian)
14496     return SDValue();
14497 
14498   unsigned NumElts = VT.getVectorNumElements();
14499   unsigned EltSizeInBits = VT.getScalarSizeInBits();
14500   ArrayRef<int> Mask = SVN->getMask();
14501   SDValue N0 = SVN->getOperand(0);
14502 
14503   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
14504   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
14505     for (unsigned i = 0; i != NumElts; ++i) {
14506       if (Mask[i] < 0)
14507         continue;
14508       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
14509         continue;
14510       return false;
14511     }
14512     return true;
14513   };
14514 
14515   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
14516   // power-of-2 extensions as they are the most likely.
14517   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
14518     if (!isAnyExtend(Scale))
14519       continue;
14520 
14521     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
14522     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
14523     if (!LegalOperations ||
14524         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
14525       return DAG.getBitcast(VT,
14526                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
14527   }
14528 
14529   return SDValue();
14530 }
14531 
14532 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
14533 // each source element of a large type into the lowest elements of a smaller
14534 // destination type. This is often generated during legalization.
14535 // If the source node itself was a '*_extend_vector_inreg' node then we should
14536 // then be able to remove it.
14537 SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
14538   EVT VT = SVN->getValueType(0);
14539   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
14540 
14541   // TODO Add support for big-endian when we have a test case.
14542   if (!VT.isInteger() || IsBigEndian)
14543     return SDValue();
14544 
14545   SDValue N0 = SVN->getOperand(0);
14546   while (N0.getOpcode() == ISD::BITCAST)
14547     N0 = N0.getOperand(0);
14548 
14549   unsigned Opcode = N0.getOpcode();
14550   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
14551       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
14552       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
14553     return SDValue();
14554 
14555   SDValue N00 = N0.getOperand(0);
14556   ArrayRef<int> Mask = SVN->getMask();
14557   unsigned NumElts = VT.getVectorNumElements();
14558   unsigned EltSizeInBits = VT.getScalarSizeInBits();
14559   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
14560 
14561   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
14562   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
14563   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
14564   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
14565     for (unsigned i = 0; i != NumElts; ++i) {
14566       if (Mask[i] < 0)
14567         continue;
14568       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
14569         continue;
14570       return false;
14571     }
14572     return true;
14573   };
14574 
14575   // At the moment we just handle the case where we've truncated back to the
14576   // same size as before the extension.
14577   // TODO: handle more extension/truncation cases as cases arise.
14578   if (EltSizeInBits != ExtSrcSizeInBits)
14579     return SDValue();
14580 
14581   // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
14582   // power-of-2 truncations as they are the most likely.
14583   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
14584     if (isTruncate(Scale))
14585       return DAG.getBitcast(VT, N00);
14586 
14587   return SDValue();
14588 }
14589 
14590 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
14591   EVT VT = N->getValueType(0);
14592   unsigned NumElts = VT.getVectorNumElements();
14593 
14594   SDValue N0 = N->getOperand(0);
14595   SDValue N1 = N->getOperand(1);
14596 
14597   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
14598 
14599   // Canonicalize shuffle undef, undef -> undef
14600   if (N0.isUndef() && N1.isUndef())
14601     return DAG.getUNDEF(VT);
14602 
14603   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14604 
14605   // Canonicalize shuffle v, v -> v, undef
14606   if (N0 == N1) {
14607     SmallVector<int, 8> NewMask;
14608     for (unsigned i = 0; i != NumElts; ++i) {
14609       int Idx = SVN->getMaskElt(i);
14610       if (Idx >= (int)NumElts) Idx -= NumElts;
14611       NewMask.push_back(Idx);
14612     }
14613     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
14614   }
14615 
14616   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
14617   if (N0.isUndef())
14618     return DAG.getCommutedVectorShuffle(*SVN);
14619 
14620   // Remove references to rhs if it is undef
14621   if (N1.isUndef()) {
14622     bool Changed = false;
14623     SmallVector<int, 8> NewMask;
14624     for (unsigned i = 0; i != NumElts; ++i) {
14625       int Idx = SVN->getMaskElt(i);
14626       if (Idx >= (int)NumElts) {
14627         Idx = -1;
14628         Changed = true;
14629       }
14630       NewMask.push_back(Idx);
14631     }
14632     if (Changed)
14633       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
14634   }
14635 
14636   // If it is a splat, check if the argument vector is another splat or a
14637   // build_vector.
14638   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
14639     SDNode *V = N0.getNode();
14640 
14641     // If this is a bit convert that changes the element type of the vector but
14642     // not the number of vector elements, look through it.  Be careful not to
14643     // look though conversions that change things like v4f32 to v2f64.
14644     if (V->getOpcode() == ISD::BITCAST) {
14645       SDValue ConvInput = V->getOperand(0);
14646       if (ConvInput.getValueType().isVector() &&
14647           ConvInput.getValueType().getVectorNumElements() == NumElts)
14648         V = ConvInput.getNode();
14649     }
14650 
14651     if (V->getOpcode() == ISD::BUILD_VECTOR) {
14652       assert(V->getNumOperands() == NumElts &&
14653              "BUILD_VECTOR has wrong number of operands");
14654       SDValue Base;
14655       bool AllSame = true;
14656       for (unsigned i = 0; i != NumElts; ++i) {
14657         if (!V->getOperand(i).isUndef()) {
14658           Base = V->getOperand(i);
14659           break;
14660         }
14661       }
14662       // Splat of <u, u, u, u>, return <u, u, u, u>
14663       if (!Base.getNode())
14664         return N0;
14665       for (unsigned i = 0; i != NumElts; ++i) {
14666         if (V->getOperand(i) != Base) {
14667           AllSame = false;
14668           break;
14669         }
14670       }
14671       // Splat of <x, x, x, x>, return <x, x, x, x>
14672       if (AllSame)
14673         return N0;
14674 
14675       // Canonicalize any other splat as a build_vector.
14676       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
14677       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
14678       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
14679 
14680       // We may have jumped through bitcasts, so the type of the
14681       // BUILD_VECTOR may not match the type of the shuffle.
14682       if (V->getValueType(0) != VT)
14683         NewBV = DAG.getBitcast(VT, NewBV);
14684       return NewBV;
14685     }
14686   }
14687 
14688   // There are various patterns used to build up a vector from smaller vectors,
14689   // subvectors, or elements. Scan chains of these and replace unused insertions
14690   // or components with undef.
14691   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
14692     return S;
14693 
14694   // Match shuffles that can be converted to any_vector_extend_in_reg.
14695   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
14696     return V;
14697 
14698   // Combine "truncate_vector_in_reg" style shuffles.
14699   if (SDValue V = combineTruncationShuffle(SVN, DAG))
14700     return V;
14701 
14702   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
14703       Level < AfterLegalizeVectorOps &&
14704       (N1.isUndef() ||
14705       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
14706        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
14707     if (SDValue V = partitionShuffleOfConcats(N, DAG))
14708       return V;
14709   }
14710 
14711   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14712   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14713   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14714     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
14715       return Res;
14716 
14717   // If this shuffle only has a single input that is a bitcasted shuffle,
14718   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
14719   // back to their original types.
14720   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14721       N1.isUndef() && Level < AfterLegalizeVectorOps &&
14722       TLI.isTypeLegal(VT)) {
14723 
14724     // Peek through the bitcast only if there is one user.
14725     SDValue BC0 = N0;
14726     while (BC0.getOpcode() == ISD::BITCAST) {
14727       if (!BC0.hasOneUse())
14728         break;
14729       BC0 = BC0.getOperand(0);
14730     }
14731 
14732     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
14733       if (Scale == 1)
14734         return SmallVector<int, 8>(Mask.begin(), Mask.end());
14735 
14736       SmallVector<int, 8> NewMask;
14737       for (int M : Mask)
14738         for (int s = 0; s != Scale; ++s)
14739           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
14740       return NewMask;
14741     };
14742 
14743     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
14744       EVT SVT = VT.getScalarType();
14745       EVT InnerVT = BC0->getValueType(0);
14746       EVT InnerSVT = InnerVT.getScalarType();
14747 
14748       // Determine which shuffle works with the smaller scalar type.
14749       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
14750       EVT ScaleSVT = ScaleVT.getScalarType();
14751 
14752       if (TLI.isTypeLegal(ScaleVT) &&
14753           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
14754           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
14755 
14756         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14757         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14758 
14759         // Scale the shuffle masks to the smaller scalar type.
14760         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
14761         SmallVector<int, 8> InnerMask =
14762             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
14763         SmallVector<int, 8> OuterMask =
14764             ScaleShuffleMask(SVN->getMask(), OuterScale);
14765 
14766         // Merge the shuffle masks.
14767         SmallVector<int, 8> NewMask;
14768         for (int M : OuterMask)
14769           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
14770 
14771         // Test for shuffle mask legality over both commutations.
14772         SDValue SV0 = BC0->getOperand(0);
14773         SDValue SV1 = BC0->getOperand(1);
14774         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14775         if (!LegalMask) {
14776           std::swap(SV0, SV1);
14777           ShuffleVectorSDNode::commuteMask(NewMask);
14778           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14779         }
14780 
14781         if (LegalMask) {
14782           SV0 = DAG.getBitcast(ScaleVT, SV0);
14783           SV1 = DAG.getBitcast(ScaleVT, SV1);
14784           return DAG.getBitcast(
14785               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
14786         }
14787       }
14788     }
14789   }
14790 
14791   // Canonicalize shuffles according to rules:
14792   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
14793   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
14794   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
14795   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
14796       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
14797       TLI.isTypeLegal(VT)) {
14798     // The incoming shuffle must be of the same type as the result of the
14799     // current shuffle.
14800     assert(N1->getOperand(0).getValueType() == VT &&
14801            "Shuffle types don't match");
14802 
14803     SDValue SV0 = N1->getOperand(0);
14804     SDValue SV1 = N1->getOperand(1);
14805     bool HasSameOp0 = N0 == SV0;
14806     bool IsSV1Undef = SV1.isUndef();
14807     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
14808       // Commute the operands of this shuffle so that next rule
14809       // will trigger.
14810       return DAG.getCommutedVectorShuffle(*SVN);
14811   }
14812 
14813   // Try to fold according to rules:
14814   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14815   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14816   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14817   // Don't try to fold shuffles with illegal type.
14818   // Only fold if this shuffle is the only user of the other shuffle.
14819   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
14820       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
14821     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
14822 
14823     // Don't try to fold splats; they're likely to simplify somehow, or they
14824     // might be free.
14825     if (OtherSV->isSplat())
14826       return SDValue();
14827 
14828     // The incoming shuffle must be of the same type as the result of the
14829     // current shuffle.
14830     assert(OtherSV->getOperand(0).getValueType() == VT &&
14831            "Shuffle types don't match");
14832 
14833     SDValue SV0, SV1;
14834     SmallVector<int, 4> Mask;
14835     // Compute the combined shuffle mask for a shuffle with SV0 as the first
14836     // operand, and SV1 as the second operand.
14837     for (unsigned i = 0; i != NumElts; ++i) {
14838       int Idx = SVN->getMaskElt(i);
14839       if (Idx < 0) {
14840         // Propagate Undef.
14841         Mask.push_back(Idx);
14842         continue;
14843       }
14844 
14845       SDValue CurrentVec;
14846       if (Idx < (int)NumElts) {
14847         // This shuffle index refers to the inner shuffle N0. Lookup the inner
14848         // shuffle mask to identify which vector is actually referenced.
14849         Idx = OtherSV->getMaskElt(Idx);
14850         if (Idx < 0) {
14851           // Propagate Undef.
14852           Mask.push_back(Idx);
14853           continue;
14854         }
14855 
14856         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
14857                                            : OtherSV->getOperand(1);
14858       } else {
14859         // This shuffle index references an element within N1.
14860         CurrentVec = N1;
14861       }
14862 
14863       // Simple case where 'CurrentVec' is UNDEF.
14864       if (CurrentVec.isUndef()) {
14865         Mask.push_back(-1);
14866         continue;
14867       }
14868 
14869       // Canonicalize the shuffle index. We don't know yet if CurrentVec
14870       // will be the first or second operand of the combined shuffle.
14871       Idx = Idx % NumElts;
14872       if (!SV0.getNode() || SV0 == CurrentVec) {
14873         // Ok. CurrentVec is the left hand side.
14874         // Update the mask accordingly.
14875         SV0 = CurrentVec;
14876         Mask.push_back(Idx);
14877         continue;
14878       }
14879 
14880       // Bail out if we cannot convert the shuffle pair into a single shuffle.
14881       if (SV1.getNode() && SV1 != CurrentVec)
14882         return SDValue();
14883 
14884       // Ok. CurrentVec is the right hand side.
14885       // Update the mask accordingly.
14886       SV1 = CurrentVec;
14887       Mask.push_back(Idx + NumElts);
14888     }
14889 
14890     // Check if all indices in Mask are Undef. In case, propagate Undef.
14891     bool isUndefMask = true;
14892     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
14893       isUndefMask &= Mask[i] < 0;
14894 
14895     if (isUndefMask)
14896       return DAG.getUNDEF(VT);
14897 
14898     if (!SV0.getNode())
14899       SV0 = DAG.getUNDEF(VT);
14900     if (!SV1.getNode())
14901       SV1 = DAG.getUNDEF(VT);
14902 
14903     // Avoid introducing shuffles with illegal mask.
14904     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
14905       ShuffleVectorSDNode::commuteMask(Mask);
14906 
14907       if (!TLI.isShuffleMaskLegal(Mask, VT))
14908         return SDValue();
14909 
14910       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
14911       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
14912       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
14913       std::swap(SV0, SV1);
14914     }
14915 
14916     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14917     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14918     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14919     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
14920   }
14921 
14922   return SDValue();
14923 }
14924 
14925 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
14926   SDValue InVal = N->getOperand(0);
14927   EVT VT = N->getValueType(0);
14928 
14929   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
14930   // with a VECTOR_SHUFFLE.
14931   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14932     SDValue InVec = InVal->getOperand(0);
14933     SDValue EltNo = InVal->getOperand(1);
14934 
14935     // FIXME: We could support implicit truncation if the shuffle can be
14936     // scaled to a smaller vector scalar type.
14937     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
14938     if (C0 && VT == InVec.getValueType() &&
14939         VT.getScalarType() == InVal.getValueType()) {
14940       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
14941       int Elt = C0->getZExtValue();
14942       NewMask[0] = Elt;
14943 
14944       if (TLI.isShuffleMaskLegal(NewMask, VT))
14945         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
14946                                     NewMask);
14947     }
14948   }
14949 
14950   return SDValue();
14951 }
14952 
14953 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
14954   EVT VT = N->getValueType(0);
14955   SDValue N0 = N->getOperand(0);
14956   SDValue N1 = N->getOperand(1);
14957   SDValue N2 = N->getOperand(2);
14958 
14959   // If inserting an UNDEF, just return the original vector.
14960   if (N1.isUndef())
14961     return N0;
14962 
14963   // If this is an insert of an extracted vector into an undef vector, we can
14964   // just use the input to the extract.
14965   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
14966       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
14967     return N1.getOperand(0);
14968 
14969   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
14970   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
14971   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
14972   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
14973       N0.getOperand(1).getValueType() == N1.getValueType() &&
14974       N0.getOperand(2) == N2)
14975     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
14976                        N1, N2);
14977 
14978   if (!isa<ConstantSDNode>(N2))
14979     return SDValue();
14980 
14981   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
14982 
14983   // Canonicalize insert_subvector dag nodes.
14984   // Example:
14985   // (insert_subvector (insert_subvector A, Idx0), Idx1)
14986   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
14987   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
14988       N1.getValueType() == N0.getOperand(1).getValueType() &&
14989       isa<ConstantSDNode>(N0.getOperand(2))) {
14990     unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
14991     if (InsIdx < OtherIdx) {
14992       // Swap nodes.
14993       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
14994                                   N0.getOperand(0), N1, N2);
14995       AddToWorklist(NewOp.getNode());
14996       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
14997                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
14998     }
14999   }
15000 
15001   // If the input vector is a concatenation, and the insert replaces
15002   // one of the pieces, we can optimize into a single concat_vectors.
15003   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
15004       N0.getOperand(0).getValueType() == N1.getValueType()) {
15005     unsigned Factor = N1.getValueType().getVectorNumElements();
15006 
15007     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
15008     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
15009 
15010     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15011   }
15012 
15013   return SDValue();
15014 }
15015 
15016 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
15017   SDValue N0 = N->getOperand(0);
15018 
15019   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
15020   if (N0->getOpcode() == ISD::FP16_TO_FP)
15021     return N0->getOperand(0);
15022 
15023   return SDValue();
15024 }
15025 
15026 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
15027   SDValue N0 = N->getOperand(0);
15028 
15029   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
15030   if (N0->getOpcode() == ISD::AND) {
15031     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
15032     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
15033       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
15034                          N0.getOperand(0));
15035     }
15036   }
15037 
15038   return SDValue();
15039 }
15040 
15041 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
15042 /// with the destination vector and a zero vector.
15043 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
15044 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
15045 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
15046   EVT VT = N->getValueType(0);
15047   SDValue LHS = N->getOperand(0);
15048   SDValue RHS = N->getOperand(1);
15049   SDLoc DL(N);
15050 
15051   // Make sure we're not running after operation legalization where it
15052   // may have custom lowered the vector shuffles.
15053   if (LegalOperations)
15054     return SDValue();
15055 
15056   if (N->getOpcode() != ISD::AND)
15057     return SDValue();
15058 
15059   if (RHS.getOpcode() == ISD::BITCAST)
15060     RHS = RHS.getOperand(0);
15061 
15062   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
15063     return SDValue();
15064 
15065   EVT RVT = RHS.getValueType();
15066   unsigned NumElts = RHS.getNumOperands();
15067 
15068   // Attempt to create a valid clear mask, splitting the mask into
15069   // sub elements and checking to see if each is
15070   // all zeros or all ones - suitable for shuffle masking.
15071   auto BuildClearMask = [&](int Split) {
15072     int NumSubElts = NumElts * Split;
15073     int NumSubBits = RVT.getScalarSizeInBits() / Split;
15074 
15075     SmallVector<int, 8> Indices;
15076     for (int i = 0; i != NumSubElts; ++i) {
15077       int EltIdx = i / Split;
15078       int SubIdx = i % Split;
15079       SDValue Elt = RHS.getOperand(EltIdx);
15080       if (Elt.isUndef()) {
15081         Indices.push_back(-1);
15082         continue;
15083       }
15084 
15085       APInt Bits;
15086       if (isa<ConstantSDNode>(Elt))
15087         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
15088       else if (isa<ConstantFPSDNode>(Elt))
15089         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
15090       else
15091         return SDValue();
15092 
15093       // Extract the sub element from the constant bit mask.
15094       if (DAG.getDataLayout().isBigEndian()) {
15095         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
15096       } else {
15097         Bits = Bits.lshr(SubIdx * NumSubBits);
15098       }
15099 
15100       if (Split > 1)
15101         Bits = Bits.trunc(NumSubBits);
15102 
15103       if (Bits.isAllOnesValue())
15104         Indices.push_back(i);
15105       else if (Bits == 0)
15106         Indices.push_back(i + NumSubElts);
15107       else
15108         return SDValue();
15109     }
15110 
15111     // Let's see if the target supports this vector_shuffle.
15112     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
15113     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
15114     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
15115       return SDValue();
15116 
15117     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
15118     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
15119                                                    DAG.getBitcast(ClearVT, LHS),
15120                                                    Zero, Indices));
15121   };
15122 
15123   // Determine maximum split level (byte level masking).
15124   int MaxSplit = 1;
15125   if (RVT.getScalarSizeInBits() % 8 == 0)
15126     MaxSplit = RVT.getScalarSizeInBits() / 8;
15127 
15128   for (int Split = 1; Split <= MaxSplit; ++Split)
15129     if (RVT.getScalarSizeInBits() % Split == 0)
15130       if (SDValue S = BuildClearMask(Split))
15131         return S;
15132 
15133   return SDValue();
15134 }
15135 
15136 /// Visit a binary vector operation, like ADD.
15137 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
15138   assert(N->getValueType(0).isVector() &&
15139          "SimplifyVBinOp only works on vectors!");
15140 
15141   SDValue LHS = N->getOperand(0);
15142   SDValue RHS = N->getOperand(1);
15143   SDValue Ops[] = {LHS, RHS};
15144 
15145   // See if we can constant fold the vector operation.
15146   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
15147           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
15148     return Fold;
15149 
15150   // Try to convert a constant mask AND into a shuffle clear mask.
15151   if (SDValue Shuffle = XformToShuffleWithZero(N))
15152     return Shuffle;
15153 
15154   // Type legalization might introduce new shuffles in the DAG.
15155   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
15156   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
15157   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
15158       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
15159       LHS.getOperand(1).isUndef() &&
15160       RHS.getOperand(1).isUndef()) {
15161     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
15162     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
15163 
15164     if (SVN0->getMask().equals(SVN1->getMask())) {
15165       EVT VT = N->getValueType(0);
15166       SDValue UndefVector = LHS.getOperand(1);
15167       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
15168                                      LHS.getOperand(0), RHS.getOperand(0),
15169                                      N->getFlags());
15170       AddUsersToWorklist(N);
15171       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
15172                                   SVN0->getMask());
15173     }
15174   }
15175 
15176   return SDValue();
15177 }
15178 
15179 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
15180                                     SDValue N2) {
15181   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
15182 
15183   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
15184                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15185 
15186   // If we got a simplified select_cc node back from SimplifySelectCC, then
15187   // break it down into a new SETCC node, and a new SELECT node, and then return
15188   // the SELECT node, since we were called with a SELECT node.
15189   if (SCC.getNode()) {
15190     // Check to see if we got a select_cc back (to turn into setcc/select).
15191     // Otherwise, just return whatever node we got back, like fabs.
15192     if (SCC.getOpcode() == ISD::SELECT_CC) {
15193       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
15194                                   N0.getValueType(),
15195                                   SCC.getOperand(0), SCC.getOperand(1),
15196                                   SCC.getOperand(4));
15197       AddToWorklist(SETCC.getNode());
15198       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
15199                            SCC.getOperand(2), SCC.getOperand(3));
15200     }
15201 
15202     return SCC;
15203   }
15204   return SDValue();
15205 }
15206 
15207 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
15208 /// being selected between, see if we can simplify the select.  Callers of this
15209 /// should assume that TheSelect is deleted if this returns true.  As such, they
15210 /// should return the appropriate thing (e.g. the node) back to the top-level of
15211 /// the DAG combiner loop to avoid it being looked at.
15212 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
15213                                     SDValue RHS) {
15214 
15215   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15216   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
15217   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
15218     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
15219       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
15220       SDValue Sqrt = RHS;
15221       ISD::CondCode CC;
15222       SDValue CmpLHS;
15223       const ConstantFPSDNode *Zero = nullptr;
15224 
15225       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
15226         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
15227         CmpLHS = TheSelect->getOperand(0);
15228         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
15229       } else {
15230         // SELECT or VSELECT
15231         SDValue Cmp = TheSelect->getOperand(0);
15232         if (Cmp.getOpcode() == ISD::SETCC) {
15233           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
15234           CmpLHS = Cmp.getOperand(0);
15235           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
15236         }
15237       }
15238       if (Zero && Zero->isZero() &&
15239           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
15240           CC == ISD::SETULT || CC == ISD::SETLT)) {
15241         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
15242         CombineTo(TheSelect, Sqrt);
15243         return true;
15244       }
15245     }
15246   }
15247   // Cannot simplify select with vector condition
15248   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
15249 
15250   // If this is a select from two identical things, try to pull the operation
15251   // through the select.
15252   if (LHS.getOpcode() != RHS.getOpcode() ||
15253       !LHS.hasOneUse() || !RHS.hasOneUse())
15254     return false;
15255 
15256   // If this is a load and the token chain is identical, replace the select
15257   // of two loads with a load through a select of the address to load from.
15258   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
15259   // constants have been dropped into the constant pool.
15260   if (LHS.getOpcode() == ISD::LOAD) {
15261     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
15262     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
15263 
15264     // Token chains must be identical.
15265     if (LHS.getOperand(0) != RHS.getOperand(0) ||
15266         // Do not let this transformation reduce the number of volatile loads.
15267         LLD->isVolatile() || RLD->isVolatile() ||
15268         // FIXME: If either is a pre/post inc/dec load,
15269         // we'd need to split out the address adjustment.
15270         LLD->isIndexed() || RLD->isIndexed() ||
15271         // If this is an EXTLOAD, the VT's must match.
15272         LLD->getMemoryVT() != RLD->getMemoryVT() ||
15273         // If this is an EXTLOAD, the kind of extension must match.
15274         (LLD->getExtensionType() != RLD->getExtensionType() &&
15275          // The only exception is if one of the extensions is anyext.
15276          LLD->getExtensionType() != ISD::EXTLOAD &&
15277          RLD->getExtensionType() != ISD::EXTLOAD) ||
15278         // FIXME: this discards src value information.  This is
15279         // over-conservative. It would be beneficial to be able to remember
15280         // both potential memory locations.  Since we are discarding
15281         // src value info, don't do the transformation if the memory
15282         // locations are not in the default address space.
15283         LLD->getPointerInfo().getAddrSpace() != 0 ||
15284         RLD->getPointerInfo().getAddrSpace() != 0 ||
15285         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
15286                                       LLD->getBasePtr().getValueType()))
15287       return false;
15288 
15289     // Check that the select condition doesn't reach either load.  If so,
15290     // folding this will induce a cycle into the DAG.  If not, this is safe to
15291     // xform, so create a select of the addresses.
15292     SDValue Addr;
15293     if (TheSelect->getOpcode() == ISD::SELECT) {
15294       SDNode *CondNode = TheSelect->getOperand(0).getNode();
15295       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
15296           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
15297         return false;
15298       // The loads must not depend on one another.
15299       if (LLD->isPredecessorOf(RLD) ||
15300           RLD->isPredecessorOf(LLD))
15301         return false;
15302       Addr = DAG.getSelect(SDLoc(TheSelect),
15303                            LLD->getBasePtr().getValueType(),
15304                            TheSelect->getOperand(0), LLD->getBasePtr(),
15305                            RLD->getBasePtr());
15306     } else {  // Otherwise SELECT_CC
15307       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
15308       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
15309 
15310       if ((LLD->hasAnyUseOfValue(1) &&
15311            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
15312           (RLD->hasAnyUseOfValue(1) &&
15313            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
15314         return false;
15315 
15316       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
15317                          LLD->getBasePtr().getValueType(),
15318                          TheSelect->getOperand(0),
15319                          TheSelect->getOperand(1),
15320                          LLD->getBasePtr(), RLD->getBasePtr(),
15321                          TheSelect->getOperand(4));
15322     }
15323 
15324     SDValue Load;
15325     // It is safe to replace the two loads if they have different alignments,
15326     // but the new load must be the minimum (most restrictive) alignment of the
15327     // inputs.
15328     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
15329     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
15330     if (!RLD->isInvariant())
15331       MMOFlags &= ~MachineMemOperand::MOInvariant;
15332     if (!RLD->isDereferenceable())
15333       MMOFlags &= ~MachineMemOperand::MODereferenceable;
15334     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
15335       // FIXME: Discards pointer and AA info.
15336       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
15337                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
15338                          MMOFlags);
15339     } else {
15340       // FIXME: Discards pointer and AA info.
15341       Load = DAG.getExtLoad(
15342           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
15343                                                   : LLD->getExtensionType(),
15344           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
15345           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
15346     }
15347 
15348     // Users of the select now use the result of the load.
15349     CombineTo(TheSelect, Load);
15350 
15351     // Users of the old loads now use the new load's chain.  We know the
15352     // old-load value is dead now.
15353     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
15354     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
15355     return true;
15356   }
15357 
15358   return false;
15359 }
15360 
15361 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
15362 /// bitwise 'and'.
15363 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
15364                                             SDValue N1, SDValue N2, SDValue N3,
15365                                             ISD::CondCode CC) {
15366   // If this is a select where the false operand is zero and the compare is a
15367   // check of the sign bit, see if we can perform the "gzip trick":
15368   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
15369   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
15370   EVT XType = N0.getValueType();
15371   EVT AType = N2.getValueType();
15372   if (!isNullConstant(N3) || !XType.bitsGE(AType))
15373     return SDValue();
15374 
15375   // If the comparison is testing for a positive value, we have to invert
15376   // the sign bit mask, so only do that transform if the target has a bitwise
15377   // 'and not' instruction (the invert is free).
15378   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
15379     // (X > -1) ? A : 0
15380     // (X >  0) ? X : 0 <-- This is canonical signed max.
15381     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
15382       return SDValue();
15383   } else if (CC == ISD::SETLT) {
15384     // (X <  0) ? A : 0
15385     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
15386     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
15387       return SDValue();
15388   } else {
15389     return SDValue();
15390   }
15391 
15392   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
15393   // constant.
15394   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
15395   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15396   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
15397     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
15398     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
15399     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
15400     AddToWorklist(Shift.getNode());
15401 
15402     if (XType.bitsGT(AType)) {
15403       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15404       AddToWorklist(Shift.getNode());
15405     }
15406 
15407     if (CC == ISD::SETGT)
15408       Shift = DAG.getNOT(DL, Shift, AType);
15409 
15410     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15411   }
15412 
15413   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
15414   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
15415   AddToWorklist(Shift.getNode());
15416 
15417   if (XType.bitsGT(AType)) {
15418     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
15419     AddToWorklist(Shift.getNode());
15420   }
15421 
15422   if (CC == ISD::SETGT)
15423     Shift = DAG.getNOT(DL, Shift, AType);
15424 
15425   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
15426 }
15427 
15428 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
15429 /// where 'cond' is the comparison specified by CC.
15430 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
15431                                       SDValue N2, SDValue N3, ISD::CondCode CC,
15432                                       bool NotExtCompare) {
15433   // (x ? y : y) -> y.
15434   if (N2 == N3) return N2;
15435 
15436   EVT VT = N2.getValueType();
15437   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
15438   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
15439 
15440   // Determine if the condition we're dealing with is constant
15441   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
15442                               N0, N1, CC, DL, false);
15443   if (SCC.getNode()) AddToWorklist(SCC.getNode());
15444 
15445   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
15446     // fold select_cc true, x, y -> x
15447     // fold select_cc false, x, y -> y
15448     return !SCCC->isNullValue() ? N2 : N3;
15449   }
15450 
15451   // Check to see if we can simplify the select into an fabs node
15452   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15453     // Allow either -0.0 or 0.0
15454     if (CFP->isZero()) {
15455       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
15456       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
15457           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
15458           N2 == N3.getOperand(0))
15459         return DAG.getNode(ISD::FABS, DL, VT, N0);
15460 
15461       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
15462       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
15463           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
15464           N2.getOperand(0) == N3)
15465         return DAG.getNode(ISD::FABS, DL, VT, N3);
15466     }
15467   }
15468 
15469   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
15470   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
15471   // in it.  This is a win when the constant is not otherwise available because
15472   // it replaces two constant pool loads with one.  We only do this if the FP
15473   // type is known to be legal, because if it isn't, then we are before legalize
15474   // types an we want the other legalization to happen first (e.g. to avoid
15475   // messing with soft float) and if the ConstantFP is not legal, because if
15476   // it is legal, we may not need to store the FP constant in a constant pool.
15477   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
15478     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
15479       if (TLI.isTypeLegal(N2.getValueType()) &&
15480           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
15481                TargetLowering::Legal &&
15482            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
15483            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
15484           // If both constants have multiple uses, then we won't need to do an
15485           // extra load, they are likely around in registers for other users.
15486           (TV->hasOneUse() || FV->hasOneUse())) {
15487         Constant *Elts[] = {
15488           const_cast<ConstantFP*>(FV->getConstantFPValue()),
15489           const_cast<ConstantFP*>(TV->getConstantFPValue())
15490         };
15491         Type *FPTy = Elts[0]->getType();
15492         const DataLayout &TD = DAG.getDataLayout();
15493 
15494         // Create a ConstantArray of the two constants.
15495         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
15496         SDValue CPIdx =
15497             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
15498                                 TD.getPrefTypeAlignment(FPTy));
15499         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
15500 
15501         // Get the offsets to the 0 and 1 element of the array so that we can
15502         // select between them.
15503         SDValue Zero = DAG.getIntPtrConstant(0, DL);
15504         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
15505         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
15506 
15507         SDValue Cond = DAG.getSetCC(DL,
15508                                     getSetCCResultType(N0.getValueType()),
15509                                     N0, N1, CC);
15510         AddToWorklist(Cond.getNode());
15511         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
15512                                           Cond, One, Zero);
15513         AddToWorklist(CstOffset.getNode());
15514         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
15515                             CstOffset);
15516         AddToWorklist(CPIdx.getNode());
15517         return DAG.getLoad(
15518             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
15519             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
15520             Alignment);
15521       }
15522     }
15523 
15524   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
15525     return V;
15526 
15527   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
15528   // where y is has a single bit set.
15529   // A plaintext description would be, we can turn the SELECT_CC into an AND
15530   // when the condition can be materialized as an all-ones register.  Any
15531   // single bit-test can be materialized as an all-ones register with
15532   // shift-left and shift-right-arith.
15533   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
15534       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
15535     SDValue AndLHS = N0->getOperand(0);
15536     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15537     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
15538       // Shift the tested bit over the sign bit.
15539       const APInt &AndMask = ConstAndRHS->getAPIntValue();
15540       SDValue ShlAmt =
15541         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
15542                         getShiftAmountTy(AndLHS.getValueType()));
15543       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
15544 
15545       // Now arithmetic right shift it all the way over, so the result is either
15546       // all-ones, or zero.
15547       SDValue ShrAmt =
15548         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
15549                         getShiftAmountTy(Shl.getValueType()));
15550       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
15551 
15552       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
15553     }
15554   }
15555 
15556   // fold select C, 16, 0 -> shl C, 4
15557   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
15558       TLI.getBooleanContents(N0.getValueType()) ==
15559           TargetLowering::ZeroOrOneBooleanContent) {
15560 
15561     // If the caller doesn't want us to simplify this into a zext of a compare,
15562     // don't do it.
15563     if (NotExtCompare && N2C->isOne())
15564       return SDValue();
15565 
15566     // Get a SetCC of the condition
15567     // NOTE: Don't create a SETCC if it's not legal on this target.
15568     if (!LegalOperations ||
15569         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
15570       SDValue Temp, SCC;
15571       // cast from setcc result type to select result type
15572       if (LegalTypes) {
15573         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
15574                             N0, N1, CC);
15575         if (N2.getValueType().bitsLT(SCC.getValueType()))
15576           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
15577                                         N2.getValueType());
15578         else
15579           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
15580                              N2.getValueType(), SCC);
15581       } else {
15582         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
15583         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
15584                            N2.getValueType(), SCC);
15585       }
15586 
15587       AddToWorklist(SCC.getNode());
15588       AddToWorklist(Temp.getNode());
15589 
15590       if (N2C->isOne())
15591         return Temp;
15592 
15593       // shl setcc result by log2 n2c
15594       return DAG.getNode(
15595           ISD::SHL, DL, N2.getValueType(), Temp,
15596           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
15597                           getShiftAmountTy(Temp.getValueType())));
15598     }
15599   }
15600 
15601   // Check to see if this is an integer abs.
15602   // select_cc setg[te] X,  0,  X, -X ->
15603   // select_cc setgt    X, -1,  X, -X ->
15604   // select_cc setl[te] X,  0, -X,  X ->
15605   // select_cc setlt    X,  1, -X,  X ->
15606   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
15607   if (N1C) {
15608     ConstantSDNode *SubC = nullptr;
15609     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
15610          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
15611         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
15612       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
15613     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
15614               (N1C->isOne() && CC == ISD::SETLT)) &&
15615              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
15616       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
15617 
15618     EVT XType = N0.getValueType();
15619     if (SubC && SubC->isNullValue() && XType.isInteger()) {
15620       SDLoc DL(N0);
15621       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
15622                                   N0,
15623                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
15624                                          getShiftAmountTy(N0.getValueType())));
15625       SDValue Add = DAG.getNode(ISD::ADD, DL,
15626                                 XType, N0, Shift);
15627       AddToWorklist(Shift.getNode());
15628       AddToWorklist(Add.getNode());
15629       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
15630     }
15631   }
15632 
15633   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
15634   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
15635   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
15636   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
15637   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
15638   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
15639   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
15640   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
15641   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
15642     SDValue ValueOnZero = N2;
15643     SDValue Count = N3;
15644     // If the condition is NE instead of E, swap the operands.
15645     if (CC == ISD::SETNE)
15646       std::swap(ValueOnZero, Count);
15647     // Check if the value on zero is a constant equal to the bits in the type.
15648     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
15649       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
15650         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
15651         // legal, combine to just cttz.
15652         if ((Count.getOpcode() == ISD::CTTZ ||
15653              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
15654             N0 == Count.getOperand(0) &&
15655             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
15656           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
15657         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
15658         // legal, combine to just ctlz.
15659         if ((Count.getOpcode() == ISD::CTLZ ||
15660              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
15661             N0 == Count.getOperand(0) &&
15662             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
15663           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
15664       }
15665     }
15666   }
15667 
15668   return SDValue();
15669 }
15670 
15671 /// This is a stub for TargetLowering::SimplifySetCC.
15672 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
15673                                    ISD::CondCode Cond, const SDLoc &DL,
15674                                    bool foldBooleans) {
15675   TargetLowering::DAGCombinerInfo
15676     DagCombineInfo(DAG, Level, false, this);
15677   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
15678 }
15679 
15680 /// Given an ISD::SDIV node expressing a divide by constant, return
15681 /// a DAG expression to select that will generate the same value by multiplying
15682 /// by a magic number.
15683 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
15684 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
15685   // when optimising for minimum size, we don't want to expand a div to a mul
15686   // and a shift.
15687   if (DAG.getMachineFunction().getFunction()->optForMinSize())
15688     return SDValue();
15689 
15690   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15691   if (!C)
15692     return SDValue();
15693 
15694   // Avoid division by zero.
15695   if (C->isNullValue())
15696     return SDValue();
15697 
15698   std::vector<SDNode*> Built;
15699   SDValue S =
15700       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
15701 
15702   for (SDNode *N : Built)
15703     AddToWorklist(N);
15704   return S;
15705 }
15706 
15707 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
15708 /// DAG expression that will generate the same value by right shifting.
15709 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
15710   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15711   if (!C)
15712     return SDValue();
15713 
15714   // Avoid division by zero.
15715   if (C->isNullValue())
15716     return SDValue();
15717 
15718   std::vector<SDNode *> Built;
15719   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
15720 
15721   for (SDNode *N : Built)
15722     AddToWorklist(N);
15723   return S;
15724 }
15725 
15726 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
15727 /// expression that will generate the same value by multiplying by a magic
15728 /// number.
15729 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
15730 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
15731   // when optimising for minimum size, we don't want to expand a div to a mul
15732   // and a shift.
15733   if (DAG.getMachineFunction().getFunction()->optForMinSize())
15734     return SDValue();
15735 
15736   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
15737   if (!C)
15738     return SDValue();
15739 
15740   // Avoid division by zero.
15741   if (C->isNullValue())
15742     return SDValue();
15743 
15744   std::vector<SDNode*> Built;
15745   SDValue S =
15746       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
15747 
15748   for (SDNode *N : Built)
15749     AddToWorklist(N);
15750   return S;
15751 }
15752 
15753 /// Determines the LogBase2 value for a non-null input value using the
15754 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
15755 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
15756   EVT VT = V.getValueType();
15757   unsigned EltBits = VT.getScalarSizeInBits();
15758   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
15759   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
15760   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
15761   return LogBase2;
15762 }
15763 
15764 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15765 /// For the reciprocal, we need to find the zero of the function:
15766 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
15767 ///     =>
15768 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
15769 ///     does not require additional intermediate precision]
15770 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
15771   if (Level >= AfterLegalizeDAG)
15772     return SDValue();
15773 
15774   // TODO: Handle half and/or extended types?
15775   EVT VT = Op.getValueType();
15776   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15777     return SDValue();
15778 
15779   // If estimates are explicitly disabled for this function, we're done.
15780   MachineFunction &MF = DAG.getMachineFunction();
15781   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
15782   if (Enabled == TLI.ReciprocalEstimate::Disabled)
15783     return SDValue();
15784 
15785   // Estimates may be explicitly enabled for this type with a custom number of
15786   // refinement steps.
15787   int Iterations = TLI.getDivRefinementSteps(VT, MF);
15788   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
15789     AddToWorklist(Est.getNode());
15790 
15791     if (Iterations) {
15792       EVT VT = Op.getValueType();
15793       SDLoc DL(Op);
15794       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15795 
15796       // Newton iterations: Est = Est + Est (1 - Arg * Est)
15797       for (int i = 0; i < Iterations; ++i) {
15798         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
15799         AddToWorklist(NewEst.getNode());
15800 
15801         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
15802         AddToWorklist(NewEst.getNode());
15803 
15804         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15805         AddToWorklist(NewEst.getNode());
15806 
15807         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
15808         AddToWorklist(Est.getNode());
15809       }
15810     }
15811     return Est;
15812   }
15813 
15814   return SDValue();
15815 }
15816 
15817 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15818 /// For the reciprocal sqrt, we need to find the zero of the function:
15819 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15820 ///     =>
15821 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
15822 /// As a result, we precompute A/2 prior to the iteration loop.
15823 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
15824                                          unsigned Iterations,
15825                                          SDNodeFlags *Flags, bool Reciprocal) {
15826   EVT VT = Arg.getValueType();
15827   SDLoc DL(Arg);
15828   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
15829 
15830   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
15831   // this entire sequence requires only one FP constant.
15832   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
15833   AddToWorklist(HalfArg.getNode());
15834 
15835   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
15836   AddToWorklist(HalfArg.getNode());
15837 
15838   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
15839   for (unsigned i = 0; i < Iterations; ++i) {
15840     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
15841     AddToWorklist(NewEst.getNode());
15842 
15843     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
15844     AddToWorklist(NewEst.getNode());
15845 
15846     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
15847     AddToWorklist(NewEst.getNode());
15848 
15849     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15850     AddToWorklist(Est.getNode());
15851   }
15852 
15853   // If non-reciprocal square root is requested, multiply the result by Arg.
15854   if (!Reciprocal) {
15855     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
15856     AddToWorklist(Est.getNode());
15857   }
15858 
15859   return Est;
15860 }
15861 
15862 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15863 /// For the reciprocal sqrt, we need to find the zero of the function:
15864 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15865 ///     =>
15866 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
15867 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
15868                                          unsigned Iterations,
15869                                          SDNodeFlags *Flags, bool Reciprocal) {
15870   EVT VT = Arg.getValueType();
15871   SDLoc DL(Arg);
15872   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
15873   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
15874 
15875   // This routine must enter the loop below to work correctly
15876   // when (Reciprocal == false).
15877   assert(Iterations > 0);
15878 
15879   // Newton iterations for reciprocal square root:
15880   // E = (E * -0.5) * ((A * E) * E + -3.0)
15881   for (unsigned i = 0; i < Iterations; ++i) {
15882     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
15883     AddToWorklist(AE.getNode());
15884 
15885     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
15886     AddToWorklist(AEE.getNode());
15887 
15888     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
15889     AddToWorklist(RHS.getNode());
15890 
15891     // When calculating a square root at the last iteration build:
15892     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
15893     // (notice a common subexpression)
15894     SDValue LHS;
15895     if (Reciprocal || (i + 1) < Iterations) {
15896       // RSQRT: LHS = (E * -0.5)
15897       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
15898     } else {
15899       // SQRT: LHS = (A * E) * -0.5
15900       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
15901     }
15902     AddToWorklist(LHS.getNode());
15903 
15904     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
15905     AddToWorklist(Est.getNode());
15906   }
15907 
15908   return Est;
15909 }
15910 
15911 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
15912 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
15913 /// Op can be zero.
15914 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
15915                                            bool Reciprocal) {
15916   if (Level >= AfterLegalizeDAG)
15917     return SDValue();
15918 
15919   // TODO: Handle half and/or extended types?
15920   EVT VT = Op.getValueType();
15921   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15922     return SDValue();
15923 
15924   // If estimates are explicitly disabled for this function, we're done.
15925   MachineFunction &MF = DAG.getMachineFunction();
15926   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
15927   if (Enabled == TLI.ReciprocalEstimate::Disabled)
15928     return SDValue();
15929 
15930   // Estimates may be explicitly enabled for this type with a custom number of
15931   // refinement steps.
15932   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
15933 
15934   bool UseOneConstNR = false;
15935   if (SDValue Est =
15936       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
15937                           Reciprocal)) {
15938     AddToWorklist(Est.getNode());
15939 
15940     if (Iterations) {
15941       Est = UseOneConstNR
15942             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
15943             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
15944 
15945       if (!Reciprocal) {
15946         // Unfortunately, Est is now NaN if the input was exactly 0.0.
15947         // Select out this case and force the answer to 0.0.
15948         EVT VT = Op.getValueType();
15949         SDLoc DL(Op);
15950 
15951         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
15952         EVT CCVT = getSetCCResultType(VT);
15953         SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
15954         AddToWorklist(ZeroCmp.getNode());
15955 
15956         Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
15957                           ZeroCmp, FPZero, Est);
15958         AddToWorklist(Est.getNode());
15959       }
15960     }
15961     return Est;
15962   }
15963 
15964   return SDValue();
15965 }
15966 
15967 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15968   return buildSqrtEstimateImpl(Op, Flags, true);
15969 }
15970 
15971 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15972   return buildSqrtEstimateImpl(Op, Flags, false);
15973 }
15974 
15975 /// Return true if base is a frame index, which is known not to alias with
15976 /// anything but itself.  Provides base object and offset as results.
15977 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
15978                            const GlobalValue *&GV, const void *&CV) {
15979   // Assume it is a primitive operation.
15980   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
15981 
15982   // If it's an adding a simple constant then integrate the offset.
15983   if (Base.getOpcode() == ISD::ADD) {
15984     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
15985       Base = Base.getOperand(0);
15986       Offset += C->getSExtValue();
15987     }
15988   }
15989 
15990   // Return the underlying GlobalValue, and update the Offset.  Return false
15991   // for GlobalAddressSDNode since the same GlobalAddress may be represented
15992   // by multiple nodes with different offsets.
15993   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
15994     GV = G->getGlobal();
15995     Offset += G->getOffset();
15996     return false;
15997   }
15998 
15999   // Return the underlying Constant value, and update the Offset.  Return false
16000   // for ConstantSDNodes since the same constant pool entry may be represented
16001   // by multiple nodes with different offsets.
16002   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
16003     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
16004                                          : (const void *)C->getConstVal();
16005     Offset += C->getOffset();
16006     return false;
16007   }
16008   // If it's any of the following then it can't alias with anything but itself.
16009   return isa<FrameIndexSDNode>(Base);
16010 }
16011 
16012 /// Return true if there is any possibility that the two addresses overlap.
16013 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
16014   // If they are the same then they must be aliases.
16015   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
16016 
16017   // If they are both volatile then they cannot be reordered.
16018   if (Op0->isVolatile() && Op1->isVolatile()) return true;
16019 
16020   // If one operation reads from invariant memory, and the other may store, they
16021   // cannot alias. These should really be checking the equivalent of mayWrite,
16022   // but it only matters for memory nodes other than load /store.
16023   if (Op0->isInvariant() && Op1->writeMem())
16024     return false;
16025 
16026   if (Op1->isInvariant() && Op0->writeMem())
16027     return false;
16028 
16029   // Gather base node and offset information.
16030   SDValue Base1, Base2;
16031   int64_t Offset1, Offset2;
16032   const GlobalValue *GV1, *GV2;
16033   const void *CV1, *CV2;
16034   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
16035                                       Base1, Offset1, GV1, CV1);
16036   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
16037                                       Base2, Offset2, GV2, CV2);
16038 
16039   // If they have a same base address then check to see if they overlap.
16040   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
16041     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
16042              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
16043 
16044   // It is possible for different frame indices to alias each other, mostly
16045   // when tail call optimization reuses return address slots for arguments.
16046   // To catch this case, look up the actual index of frame indices to compute
16047   // the real alias relationship.
16048   if (isFrameIndex1 && isFrameIndex2) {
16049     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
16050     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
16051     Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
16052     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
16053              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
16054   }
16055 
16056   // Otherwise, if we know what the bases are, and they aren't identical, then
16057   // we know they cannot alias.
16058   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
16059     return false;
16060 
16061   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
16062   // compared to the size and offset of the access, we may be able to prove they
16063   // do not alias.  This check is conservative for now to catch cases created by
16064   // splitting vector types.
16065   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
16066       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
16067       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
16068        Op1->getMemoryVT().getSizeInBits() >> 3) &&
16069       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
16070     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
16071     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
16072 
16073     // There is no overlap between these relatively aligned accesses of similar
16074     // size, return no alias.
16075     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
16076         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
16077       return false;
16078   }
16079 
16080   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
16081                    ? CombinerGlobalAA
16082                    : DAG.getSubtarget().useAA();
16083 #ifndef NDEBUG
16084   if (CombinerAAOnlyFunc.getNumOccurrences() &&
16085       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
16086     UseAA = false;
16087 #endif
16088   if (UseAA &&
16089       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
16090     // Use alias analysis information.
16091     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
16092                                  Op1->getSrcValueOffset());
16093     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
16094         Op0->getSrcValueOffset() - MinOffset;
16095     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
16096         Op1->getSrcValueOffset() - MinOffset;
16097     AliasResult AAResult =
16098         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
16099                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
16100                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
16101                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
16102     if (AAResult == NoAlias)
16103       return false;
16104   }
16105 
16106   // Otherwise we have to assume they alias.
16107   return true;
16108 }
16109 
16110 /// Walk up chain skipping non-aliasing memory nodes,
16111 /// looking for aliasing nodes and adding them to the Aliases vector.
16112 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
16113                                    SmallVectorImpl<SDValue> &Aliases) {
16114   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
16115   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
16116 
16117   // Get alias information for node.
16118   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
16119 
16120   // Starting off.
16121   Chains.push_back(OriginalChain);
16122   unsigned Depth = 0;
16123 
16124   // Look at each chain and determine if it is an alias.  If so, add it to the
16125   // aliases list.  If not, then continue up the chain looking for the next
16126   // candidate.
16127   while (!Chains.empty()) {
16128     SDValue Chain = Chains.pop_back_val();
16129 
16130     // For TokenFactor nodes, look at each operand and only continue up the
16131     // chain until we reach the depth limit.
16132     //
16133     // FIXME: The depth check could be made to return the last non-aliasing
16134     // chain we found before we hit a tokenfactor rather than the original
16135     // chain.
16136     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
16137       Aliases.clear();
16138       Aliases.push_back(OriginalChain);
16139       return;
16140     }
16141 
16142     // Don't bother if we've been before.
16143     if (!Visited.insert(Chain.getNode()).second)
16144       continue;
16145 
16146     switch (Chain.getOpcode()) {
16147     case ISD::EntryToken:
16148       // Entry token is ideal chain operand, but handled in FindBetterChain.
16149       break;
16150 
16151     case ISD::LOAD:
16152     case ISD::STORE: {
16153       // Get alias information for Chain.
16154       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
16155           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
16156 
16157       // If chain is alias then stop here.
16158       if (!(IsLoad && IsOpLoad) &&
16159           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
16160         Aliases.push_back(Chain);
16161       } else {
16162         // Look further up the chain.
16163         Chains.push_back(Chain.getOperand(0));
16164         ++Depth;
16165       }
16166       break;
16167     }
16168 
16169     case ISD::TokenFactor:
16170       // We have to check each of the operands of the token factor for "small"
16171       // token factors, so we queue them up.  Adding the operands to the queue
16172       // (stack) in reverse order maintains the original order and increases the
16173       // likelihood that getNode will find a matching token factor (CSE.)
16174       if (Chain.getNumOperands() > 16) {
16175         Aliases.push_back(Chain);
16176         break;
16177       }
16178       for (unsigned n = Chain.getNumOperands(); n;)
16179         Chains.push_back(Chain.getOperand(--n));
16180       ++Depth;
16181       break;
16182 
16183     case ISD::CopyFromReg:
16184       // Forward past CopyFromReg.
16185       Chains.push_back(Chain.getOperand(0));
16186       ++Depth;
16187       break;
16188 
16189     default:
16190       // For all other instructions we will just have to take what we can get.
16191       Aliases.push_back(Chain);
16192       break;
16193     }
16194   }
16195 }
16196 
16197 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
16198 /// (aliasing node.)
16199 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
16200   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
16201 
16202   // Accumulate all the aliases to this node.
16203   GatherAllAliases(N, OldChain, Aliases);
16204 
16205   // If no operands then chain to entry token.
16206   if (Aliases.size() == 0)
16207     return DAG.getEntryNode();
16208 
16209   // If a single operand then chain to it.  We don't need to revisit it.
16210   if (Aliases.size() == 1)
16211     return Aliases[0];
16212 
16213   // Construct a custom tailored token factor.
16214   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
16215 }
16216 
16217 // This function tries to collect a bunch of potentially interesting
16218 // nodes to improve the chains of, all at once. This might seem
16219 // redundant, as this function gets called when visiting every store
16220 // node, so why not let the work be done on each store as it's visited?
16221 //
16222 // I believe this is mainly important because MergeConsecutiveStores
16223 // is unable to deal with merging stores of different sizes, so unless
16224 // we improve the chains of all the potential candidates up-front
16225 // before running MergeConsecutiveStores, it might only see some of
16226 // the nodes that will eventually be candidates, and then not be able
16227 // to go from a partially-merged state to the desired final
16228 // fully-merged state.
16229 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
16230   // This holds the base pointer, index, and the offset in bytes from the base
16231   // pointer.
16232   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
16233 
16234   // We must have a base and an offset.
16235   if (!BasePtr.Base.getNode())
16236     return false;
16237 
16238   // Do not handle stores to undef base pointers.
16239   if (BasePtr.Base.isUndef())
16240     return false;
16241 
16242   SmallVector<StoreSDNode *, 8> ChainedStores;
16243   ChainedStores.push_back(St);
16244 
16245   // Walk up the chain and look for nodes with offsets from the same
16246   // base pointer. Stop when reaching an instruction with a different kind
16247   // or instruction which has a different base pointer.
16248   StoreSDNode *Index = St;
16249   while (Index) {
16250     // If the chain has more than one use, then we can't reorder the mem ops.
16251     if (Index != St && !SDValue(Index, 0)->hasOneUse())
16252       break;
16253 
16254     if (Index->isVolatile() || Index->isIndexed())
16255       break;
16256 
16257     // Find the base pointer and offset for this memory node.
16258     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
16259 
16260     // Check that the base pointer is the same as the original one.
16261     if (!Ptr.equalBaseIndex(BasePtr))
16262       break;
16263 
16264     // Walk up the chain to find the next store node, ignoring any
16265     // intermediate loads. Any other kind of node will halt the loop.
16266     SDNode *NextInChain = Index->getChain().getNode();
16267     while (true) {
16268       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
16269         // We found a store node. Use it for the next iteration.
16270         if (STn->isVolatile() || STn->isIndexed()) {
16271           Index = nullptr;
16272           break;
16273         }
16274         ChainedStores.push_back(STn);
16275         Index = STn;
16276         break;
16277       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
16278         NextInChain = Ldn->getChain().getNode();
16279         continue;
16280       } else {
16281         Index = nullptr;
16282         break;
16283       }
16284     } // end while
16285   }
16286 
16287   // At this point, ChainedStores lists all of the Store nodes
16288   // reachable by iterating up through chain nodes matching the above
16289   // conditions.  For each such store identified, try to find an
16290   // earlier chain to attach the store to which won't violate the
16291   // required ordering.
16292   bool MadeChangeToSt = false;
16293   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
16294 
16295   for (StoreSDNode *ChainedStore : ChainedStores) {
16296     SDValue Chain = ChainedStore->getChain();
16297     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
16298 
16299     if (Chain != BetterChain) {
16300       if (ChainedStore == St)
16301         MadeChangeToSt = true;
16302       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
16303     }
16304   }
16305 
16306   // Do all replacements after finding the replacements to make to avoid making
16307   // the chains more complicated by introducing new TokenFactors.
16308   for (auto Replacement : BetterChains)
16309     replaceStoreChain(Replacement.first, Replacement.second);
16310 
16311   return MadeChangeToSt;
16312 }
16313 
16314 /// This is the entry point for the file.
16315 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
16316                            CodeGenOpt::Level OptLevel) {
16317   /// This is the main entry point to this class.
16318   DAGCombiner(*this, AA, OptLevel).Run(Level);
16319 }
16320