1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/MachineValueType.h"
40 #include "llvm/CodeGen/RuntimeLibcalls.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/KnownBits.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116   class DAGCombiner {
117     SelectionDAG &DAG;
118     const TargetLowering &TLI;
119     CombineLevel Level;
120     CodeGenOpt::Level OptLevel;
121     bool LegalOperations = false;
122     bool LegalTypes = false;
123     bool ForCodeSize;
124 
125     /// \brief Worklist of all of the nodes that need to be simplified.
126     ///
127     /// This must behave as a stack -- new nodes to process are pushed onto the
128     /// back and when processing we pop off of the back.
129     ///
130     /// The worklist will not contain duplicates but may contain null entries
131     /// due to nodes being deleted from the underlying DAG.
132     SmallVector<SDNode *, 64> Worklist;
133 
134     /// \brief Mapping from an SDNode to its position on the worklist.
135     ///
136     /// This is used to find and remove nodes from the worklist (by nulling
137     /// them) when they are deleted from the underlying DAG. It relies on
138     /// stable indices of nodes within the worklist.
139     DenseMap<SDNode *, unsigned> WorklistMap;
140 
141     /// \brief Set of nodes which have been combined (at least once).
142     ///
143     /// This is used to allow us to reliably add any operands of a DAG node
144     /// which have not yet been combined to the worklist.
145     SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147     // AA - Used for DAG load/store alias analysis.
148     AliasAnalysis *AA;
149 
150     /// When an instruction is simplified, add all users of the instruction to
151     /// the work lists because they might get more simplified now.
152     void AddUsersToWorklist(SDNode *N) {
153       for (SDNode *Node : N->uses())
154         AddToWorklist(Node);
155     }
156 
157     /// Call the node-specific routine that folds each particular type of node.
158     SDValue visit(SDNode *N);
159 
160   public:
161     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163           OptLevel(OL), AA(AA) {
164       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166       MaximumLegalStoreInBits = 0;
167       for (MVT VT : MVT::all_valuetypes())
168         if (EVT(VT).isSimple() && VT != MVT::Other &&
169             TLI.isTypeLegal(EVT(VT)) &&
170             VT.getSizeInBits() >= MaximumLegalStoreInBits)
171           MaximumLegalStoreInBits = VT.getSizeInBits();
172     }
173 
174     /// Add to the worklist making sure its instance is at the back (next to be
175     /// processed.)
176     void AddToWorklist(SDNode *N) {
177       assert(N->getOpcode() != ISD::DELETED_NODE &&
178              "Deleted Node added to Worklist");
179 
180       // Skip handle nodes as they can't usefully be combined and confuse the
181       // zero-use deletion strategy.
182       if (N->getOpcode() == ISD::HANDLENODE)
183         return;
184 
185       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186         Worklist.push_back(N);
187     }
188 
189     /// Remove all instances of N from the worklist.
190     void removeFromWorklist(SDNode *N) {
191       CombinedNodes.erase(N);
192 
193       auto It = WorklistMap.find(N);
194       if (It == WorklistMap.end())
195         return; // Not in the worklist.
196 
197       // Null out the entry rather than erasing it to avoid a linear operation.
198       Worklist[It->second] = nullptr;
199       WorklistMap.erase(It);
200     }
201 
202     void deleteAndRecombine(SDNode *N);
203     bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205     /// Replaces all uses of the results of one DAG node with new values.
206     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207                       bool AddTo = true);
208 
209     /// Replaces all uses of the results of one DAG node with new values.
210     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211       return CombineTo(N, &Res, 1, AddTo);
212     }
213 
214     /// Replaces all uses of the results of one DAG node with new values.
215     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216                       bool AddTo = true) {
217       SDValue To[] = { Res0, Res1 };
218       return CombineTo(N, To, 2, AddTo);
219     }
220 
221     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223   private:
224     unsigned MaximumLegalStoreInBits;
225 
226     /// Check the specified integer node value to see if it can be simplified or
227     /// if things it uses can be simplified by bit propagation.
228     /// If so, return true.
229     bool SimplifyDemandedBits(SDValue Op) {
230       unsigned BitWidth = Op.getScalarValueSizeInBits();
231       APInt Demanded = APInt::getAllOnesValue(BitWidth);
232       return SimplifyDemandedBits(Op, Demanded);
233     }
234 
235     /// Check the specified vector node value to see if it can be simplified or
236     /// if things it uses can be simplified as it only uses some of the
237     /// elements. If so, return true.
238     bool SimplifyDemandedVectorElts(SDValue Op) {
239       unsigned NumElts = Op.getValueType().getVectorNumElements();
240       APInt Demanded = APInt::getAllOnesValue(NumElts);
241       return SimplifyDemandedVectorElts(Op, Demanded);
242     }
243 
244     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
246 
247     bool CombineToPreIndexedLoadStore(SDNode *N);
248     bool CombineToPostIndexedLoadStore(SDNode *N);
249     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250     bool SliceUpLoad(SDNode *N);
251 
252     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253     ///   load.
254     ///
255     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257     /// \param EltNo index of the vector element to load.
258     /// \param OriginalLoad load that EVE came from to be replaced.
259     /// \returns EVE on success SDValue() on failure.
260     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266     SDValue PromoteIntBinOp(SDValue Op);
267     SDValue PromoteIntShiftOp(SDValue Op);
268     SDValue PromoteExtend(SDValue Op);
269     bool PromoteLoad(SDValue Op);
270 
271     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
272                          SDValue OrigLoad, SDValue ExtLoad,
273                          const SDLoc &DL,
274                          ISD::NodeType ExtType);
275 
276     /// Call the node-specific routine that knows how to fold each
277     /// particular type of node. If that doesn't do anything, try the
278     /// target-specific DAG combines.
279     SDValue combine(SDNode *N);
280 
281     // Visitation implementation - Implement dag node combining for different
282     // node types.  The semantics are as follows:
283     // Return Value:
284     //   SDValue.getNode() == 0 - No change was made
285     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
286     //   otherwise              - N should be replaced by the returned Operand.
287     //
288     SDValue visitTokenFactor(SDNode *N);
289     SDValue visitMERGE_VALUES(SDNode *N);
290     SDValue visitADD(SDNode *N);
291     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
292     SDValue visitSUB(SDNode *N);
293     SDValue visitADDC(SDNode *N);
294     SDValue visitUADDO(SDNode *N);
295     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
296     SDValue visitSUBC(SDNode *N);
297     SDValue visitUSUBO(SDNode *N);
298     SDValue visitADDE(SDNode *N);
299     SDValue visitADDCARRY(SDNode *N);
300     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
301     SDValue visitSUBE(SDNode *N);
302     SDValue visitSUBCARRY(SDNode *N);
303     SDValue visitMUL(SDNode *N);
304     SDValue useDivRem(SDNode *N);
305     SDValue visitSDIV(SDNode *N);
306     SDValue visitUDIV(SDNode *N);
307     SDValue visitREM(SDNode *N);
308     SDValue visitMULHU(SDNode *N);
309     SDValue visitMULHS(SDNode *N);
310     SDValue visitSMUL_LOHI(SDNode *N);
311     SDValue visitUMUL_LOHI(SDNode *N);
312     SDValue visitSMULO(SDNode *N);
313     SDValue visitUMULO(SDNode *N);
314     SDValue visitIMINMAX(SDNode *N);
315     SDValue visitAND(SDNode *N);
316     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
317     SDValue visitOR(SDNode *N);
318     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
319     SDValue visitXOR(SDNode *N);
320     SDValue SimplifyVBinOp(SDNode *N);
321     SDValue visitSHL(SDNode *N);
322     SDValue visitSRA(SDNode *N);
323     SDValue visitSRL(SDNode *N);
324     SDValue visitRotate(SDNode *N);
325     SDValue visitABS(SDNode *N);
326     SDValue visitBSWAP(SDNode *N);
327     SDValue visitBITREVERSE(SDNode *N);
328     SDValue visitCTLZ(SDNode *N);
329     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
330     SDValue visitCTTZ(SDNode *N);
331     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
332     SDValue visitCTPOP(SDNode *N);
333     SDValue visitSELECT(SDNode *N);
334     SDValue visitVSELECT(SDNode *N);
335     SDValue visitSELECT_CC(SDNode *N);
336     SDValue visitSETCC(SDNode *N);
337     SDValue visitSETCCE(SDNode *N);
338     SDValue visitSETCCCARRY(SDNode *N);
339     SDValue visitSIGN_EXTEND(SDNode *N);
340     SDValue visitZERO_EXTEND(SDNode *N);
341     SDValue visitANY_EXTEND(SDNode *N);
342     SDValue visitAssertExt(SDNode *N);
343     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
344     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
345     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
346     SDValue visitTRUNCATE(SDNode *N);
347     SDValue visitBITCAST(SDNode *N);
348     SDValue visitBUILD_PAIR(SDNode *N);
349     SDValue visitFADD(SDNode *N);
350     SDValue visitFSUB(SDNode *N);
351     SDValue visitFMUL(SDNode *N);
352     SDValue visitFMA(SDNode *N);
353     SDValue visitFDIV(SDNode *N);
354     SDValue visitFREM(SDNode *N);
355     SDValue visitFSQRT(SDNode *N);
356     SDValue visitFCOPYSIGN(SDNode *N);
357     SDValue visitSINT_TO_FP(SDNode *N);
358     SDValue visitUINT_TO_FP(SDNode *N);
359     SDValue visitFP_TO_SINT(SDNode *N);
360     SDValue visitFP_TO_UINT(SDNode *N);
361     SDValue visitFP_ROUND(SDNode *N);
362     SDValue visitFP_ROUND_INREG(SDNode *N);
363     SDValue visitFP_EXTEND(SDNode *N);
364     SDValue visitFNEG(SDNode *N);
365     SDValue visitFABS(SDNode *N);
366     SDValue visitFCEIL(SDNode *N);
367     SDValue visitFTRUNC(SDNode *N);
368     SDValue visitFFLOOR(SDNode *N);
369     SDValue visitFMINNUM(SDNode *N);
370     SDValue visitFMAXNUM(SDNode *N);
371     SDValue visitBRCOND(SDNode *N);
372     SDValue visitBR_CC(SDNode *N);
373     SDValue visitLOAD(SDNode *N);
374 
375     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
376     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
377 
378     SDValue visitSTORE(SDNode *N);
379     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
380     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
381     SDValue visitBUILD_VECTOR(SDNode *N);
382     SDValue visitCONCAT_VECTORS(SDNode *N);
383     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
384     SDValue visitVECTOR_SHUFFLE(SDNode *N);
385     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
386     SDValue visitINSERT_SUBVECTOR(SDNode *N);
387     SDValue visitMLOAD(SDNode *N);
388     SDValue visitMSTORE(SDNode *N);
389     SDValue visitMGATHER(SDNode *N);
390     SDValue visitMSCATTER(SDNode *N);
391     SDValue visitFP_TO_FP16(SDNode *N);
392     SDValue visitFP16_TO_FP(SDNode *N);
393 
394     SDValue visitFADDForFMACombine(SDNode *N);
395     SDValue visitFSUBForFMACombine(SDNode *N);
396     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
397 
398     SDValue XformToShuffleWithZero(SDNode *N);
399     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
400                            SDValue RHS);
401 
402     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
403 
404     SDValue foldSelectOfConstants(SDNode *N);
405     SDValue foldVSelectOfConstants(SDNode *N);
406     SDValue foldBinOpIntoSelect(SDNode *BO);
407     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
408     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
409     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
410     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
411                              SDValue N2, SDValue N3, ISD::CondCode CC,
412                              bool NotExtCompare = false);
413     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
414                                    SDValue N2, SDValue N3, ISD::CondCode CC);
415     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
416                               const SDLoc &DL);
417     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
418                           const SDLoc &DL, bool foldBooleans);
419     SDValue rebuildSetCC(SDValue N);
420 
421     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
422                            SDValue &CC) const;
423     bool isOneUseSetCC(SDValue N) const;
424 
425     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
426                                          unsigned HiOp);
427     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
428     SDValue CombineExtLoad(SDNode *N);
429     SDValue combineRepeatedFPDivisors(SDNode *N);
430     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
431     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
432     SDValue BuildSDIV(SDNode *N);
433     SDValue BuildSDIVPow2(SDNode *N);
434     SDValue BuildUDIV(SDNode *N);
435     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
436     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
437     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
438     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
439     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
440     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
441                                 SDNodeFlags Flags, bool Reciprocal);
442     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
443                                 SDNodeFlags Flags, bool Reciprocal);
444     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
445                                bool DemandHighBits = true);
446     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
447     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
448                               SDValue InnerPos, SDValue InnerNeg,
449                               unsigned PosOpcode, unsigned NegOpcode,
450                               const SDLoc &DL);
451     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
452     SDValue MatchLoadCombine(SDNode *N);
453     SDValue ReduceLoadWidth(SDNode *N);
454     SDValue ReduceLoadOpStoreWidth(SDNode *N);
455     SDValue splitMergedValStore(StoreSDNode *ST);
456     SDValue TransformFPLoadStorePair(SDNode *N);
457     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
458     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
459     SDValue reduceBuildVecToShuffle(SDNode *N);
460     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
461                                   ArrayRef<int> VectorMask, SDValue VecIn1,
462                                   SDValue VecIn2, unsigned LeftIdx);
463     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
464 
465     /// Walk up chain skipping non-aliasing memory nodes,
466     /// looking for aliasing nodes and adding them to the Aliases vector.
467     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
468                           SmallVectorImpl<SDValue> &Aliases);
469 
470     /// Return true if there is any possibility that the two addresses overlap.
471     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
472 
473     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
474     /// chain (aliasing node.)
475     SDValue FindBetterChain(SDNode *N, SDValue Chain);
476 
477     /// Try to replace a store and any possibly adjacent stores on
478     /// consecutive chains with better chains. Return true only if St is
479     /// replaced.
480     ///
481     /// Notice that other chains may still be replaced even if the function
482     /// returns false.
483     bool findBetterNeighborChains(StoreSDNode *St);
484 
485     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
486     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
487 
488     /// Holds a pointer to an LSBaseSDNode as well as information on where it
489     /// is located in a sequence of memory operations connected by a chain.
490     struct MemOpLink {
491       // Ptr to the mem node.
492       LSBaseSDNode *MemNode;
493 
494       // Offset from the base ptr.
495       int64_t OffsetFromBase;
496 
497       MemOpLink(LSBaseSDNode *N, int64_t Offset)
498           : MemNode(N), OffsetFromBase(Offset) {}
499     };
500 
501     /// This is a helper function for visitMUL to check the profitability
502     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
503     /// MulNode is the original multiply, AddNode is (add x, c1),
504     /// and ConstNode is c2.
505     bool isMulAddWithConstProfitable(SDNode *MulNode,
506                                      SDValue &AddNode,
507                                      SDValue &ConstNode);
508 
509     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
510     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
511     /// the type of the loaded value to be extended.
512     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
513                           EVT LoadResultTy, EVT &ExtVT);
514 
515     /// Helper function to calculate whether the given Load can have its
516     /// width reduced to ExtVT.
517     bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
518                            EVT &ExtVT, unsigned ShAmt = 0);
519 
520     /// Used by BackwardsPropagateMask to find suitable loads.
521     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
522                            SmallPtrSetImpl<SDNode*> &NodeWithConsts,
523                            ConstantSDNode *Mask, SDNode *&UncombinedNode);
524     /// Attempt to propagate a given AND node back to load leaves so that they
525     /// can be combined into narrow loads.
526     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
527 
528     /// Helper function for MergeConsecutiveStores which merges the
529     /// component store chains.
530     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
531                                 unsigned NumStores);
532 
533     /// This is a helper function for MergeConsecutiveStores. When the
534     /// source elements of the consecutive stores are all constants or
535     /// all extracted vector elements, try to merge them into one
536     /// larger store introducing bitcasts if necessary.  \return True
537     /// if a merged store was created.
538     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
539                                          EVT MemVT, unsigned NumStores,
540                                          bool IsConstantSrc, bool UseVector,
541                                          bool UseTrunc);
542 
543     /// This is a helper function for MergeConsecutiveStores. Stores
544     /// that potentially may be merged with St are placed in
545     /// StoreNodes.
546     void getStoreMergeCandidates(StoreSDNode *St,
547                                  SmallVectorImpl<MemOpLink> &StoreNodes);
548 
549     /// Helper function for MergeConsecutiveStores. Checks if
550     /// candidate stores have indirect dependency through their
551     /// operands. \return True if safe to merge.
552     bool checkMergeStoreCandidatesForDependencies(
553         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
554 
555     /// Merge consecutive store operations into a wide store.
556     /// This optimization uses wide integers or vectors when possible.
557     /// \return number of stores that were merged into a merged store (the
558     /// affected nodes are stored as a prefix in \p StoreNodes).
559     bool MergeConsecutiveStores(StoreSDNode *N);
560 
561     /// \brief Try to transform a truncation where C is a constant:
562     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
563     ///
564     /// \p N needs to be a truncation and its first operand an AND. Other
565     /// requirements are checked by the function (e.g. that trunc is
566     /// single-use) and if missed an empty SDValue is returned.
567     SDValue distributeTruncateThroughAnd(SDNode *N);
568 
569   public:
570     /// Runs the dag combiner on all nodes in the work list
571     void Run(CombineLevel AtLevel);
572 
573     SelectionDAG &getDAG() const { return DAG; }
574 
575     /// Returns a type large enough to hold any valid shift amount - before type
576     /// legalization these can be huge.
577     EVT getShiftAmountTy(EVT LHSTy) {
578       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
579       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
580     }
581 
582     /// This method returns true if we are running before type legalization or
583     /// if the specified VT is legal.
584     bool isTypeLegal(const EVT &VT) {
585       if (!LegalTypes) return true;
586       return TLI.isTypeLegal(VT);
587     }
588 
589     /// Convenience wrapper around TargetLowering::getSetCCResultType
590     EVT getSetCCResultType(EVT VT) const {
591       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
592     }
593   };
594 
595 /// This class is a DAGUpdateListener that removes any deleted
596 /// nodes from the worklist.
597 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
598   DAGCombiner &DC;
599 
600 public:
601   explicit WorklistRemover(DAGCombiner &dc)
602     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
603 
604   void NodeDeleted(SDNode *N, SDNode *E) override {
605     DC.removeFromWorklist(N);
606   }
607 };
608 
609 } // end anonymous namespace
610 
611 //===----------------------------------------------------------------------===//
612 //  TargetLowering::DAGCombinerInfo implementation
613 //===----------------------------------------------------------------------===//
614 
615 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
616   ((DAGCombiner*)DC)->AddToWorklist(N);
617 }
618 
619 SDValue TargetLowering::DAGCombinerInfo::
620 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
621   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
622 }
623 
624 SDValue TargetLowering::DAGCombinerInfo::
625 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
626   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
627 }
628 
629 SDValue TargetLowering::DAGCombinerInfo::
630 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
631   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
632 }
633 
634 void TargetLowering::DAGCombinerInfo::
635 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
636   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
637 }
638 
639 //===----------------------------------------------------------------------===//
640 // Helper Functions
641 //===----------------------------------------------------------------------===//
642 
643 void DAGCombiner::deleteAndRecombine(SDNode *N) {
644   removeFromWorklist(N);
645 
646   // If the operands of this node are only used by the node, they will now be
647   // dead. Make sure to re-visit them and recursively delete dead nodes.
648   for (const SDValue &Op : N->ops())
649     // For an operand generating multiple values, one of the values may
650     // become dead allowing further simplification (e.g. split index
651     // arithmetic from an indexed load).
652     if (Op->hasOneUse() || Op->getNumValues() > 1)
653       AddToWorklist(Op.getNode());
654 
655   DAG.DeleteNode(N);
656 }
657 
658 /// Return 1 if we can compute the negated form of the specified expression for
659 /// the same cost as the expression itself, or 2 if we can compute the negated
660 /// form more cheaply than the expression itself.
661 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
662                                const TargetLowering &TLI,
663                                const TargetOptions *Options,
664                                unsigned Depth = 0) {
665   // fneg is removable even if it has multiple uses.
666   if (Op.getOpcode() == ISD::FNEG) return 2;
667 
668   // Don't allow anything with multiple uses.
669   if (!Op.hasOneUse()) return 0;
670 
671   // Don't recurse exponentially.
672   if (Depth > 6) return 0;
673 
674   switch (Op.getOpcode()) {
675   default: return false;
676   case ISD::ConstantFP: {
677     if (!LegalOperations)
678       return 1;
679 
680     // Don't invert constant FP values after legalization unless the target says
681     // the negated constant is legal.
682     EVT VT = Op.getValueType();
683     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
684       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
685   }
686   case ISD::FADD:
687     // FIXME: determine better conditions for this xform.
688     if (!Options->UnsafeFPMath) return 0;
689 
690     // After operation legalization, it might not be legal to create new FSUBs.
691     if (LegalOperations &&
692         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
693       return 0;
694 
695     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
696     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
697                                     Options, Depth + 1))
698       return V;
699     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
700     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
701                               Depth + 1);
702   case ISD::FSUB:
703     // We can't turn -(A-B) into B-A when we honor signed zeros.
704     if (!Options->NoSignedZerosFPMath &&
705         !Op.getNode()->getFlags().hasNoSignedZeros())
706       return 0;
707 
708     // fold (fneg (fsub A, B)) -> (fsub B, A)
709     return 1;
710 
711   case ISD::FMUL:
712   case ISD::FDIV:
713     if (Options->HonorSignDependentRoundingFPMath()) return 0;
714 
715     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
716     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
717                                     Options, Depth + 1))
718       return V;
719 
720     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
721                               Depth + 1);
722 
723   case ISD::FP_EXTEND:
724   case ISD::FP_ROUND:
725   case ISD::FSIN:
726     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
727                               Depth + 1);
728   }
729 }
730 
731 /// If isNegatibleForFree returns true, return the newly negated expression.
732 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
733                                     bool LegalOperations, unsigned Depth = 0) {
734   const TargetOptions &Options = DAG.getTarget().Options;
735   // fneg is removable even if it has multiple uses.
736   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
737 
738   // Don't allow anything with multiple uses.
739   assert(Op.hasOneUse() && "Unknown reuse!");
740 
741   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
742 
743   const SDNodeFlags Flags = Op.getNode()->getFlags();
744 
745   switch (Op.getOpcode()) {
746   default: llvm_unreachable("Unknown code");
747   case ISD::ConstantFP: {
748     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
749     V.changeSign();
750     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
751   }
752   case ISD::FADD:
753     // FIXME: determine better conditions for this xform.
754     assert(Options.UnsafeFPMath);
755 
756     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
757     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
758                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
759       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
760                          GetNegatedExpression(Op.getOperand(0), DAG,
761                                               LegalOperations, Depth+1),
762                          Op.getOperand(1), Flags);
763     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
764     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
765                        GetNegatedExpression(Op.getOperand(1), DAG,
766                                             LegalOperations, Depth+1),
767                        Op.getOperand(0), Flags);
768   case ISD::FSUB:
769     // fold (fneg (fsub 0, B)) -> B
770     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
771       if (N0CFP->isZero())
772         return Op.getOperand(1);
773 
774     // fold (fneg (fsub A, B)) -> (fsub B, A)
775     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
776                        Op.getOperand(1), Op.getOperand(0), Flags);
777 
778   case ISD::FMUL:
779   case ISD::FDIV:
780     assert(!Options.HonorSignDependentRoundingFPMath());
781 
782     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
783     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
784                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
785       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
786                          GetNegatedExpression(Op.getOperand(0), DAG,
787                                               LegalOperations, Depth+1),
788                          Op.getOperand(1), Flags);
789 
790     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
791     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
792                        Op.getOperand(0),
793                        GetNegatedExpression(Op.getOperand(1), DAG,
794                                             LegalOperations, Depth+1), Flags);
795 
796   case ISD::FP_EXTEND:
797   case ISD::FSIN:
798     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
799                        GetNegatedExpression(Op.getOperand(0), DAG,
800                                             LegalOperations, Depth+1));
801   case ISD::FP_ROUND:
802       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
803                          GetNegatedExpression(Op.getOperand(0), DAG,
804                                               LegalOperations, Depth+1),
805                          Op.getOperand(1));
806   }
807 }
808 
809 // APInts must be the same size for most operations, this helper
810 // function zero extends the shorter of the pair so that they match.
811 // We provide an Offset so that we can create bitwidths that won't overflow.
812 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
813   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
814   LHS = LHS.zextOrSelf(Bits);
815   RHS = RHS.zextOrSelf(Bits);
816 }
817 
818 // Return true if this node is a setcc, or is a select_cc
819 // that selects between the target values used for true and false, making it
820 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
821 // the appropriate nodes based on the type of node we are checking. This
822 // simplifies life a bit for the callers.
823 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
824                                     SDValue &CC) const {
825   if (N.getOpcode() == ISD::SETCC) {
826     LHS = N.getOperand(0);
827     RHS = N.getOperand(1);
828     CC  = N.getOperand(2);
829     return true;
830   }
831 
832   if (N.getOpcode() != ISD::SELECT_CC ||
833       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
834       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
835     return false;
836 
837   if (TLI.getBooleanContents(N.getValueType()) ==
838       TargetLowering::UndefinedBooleanContent)
839     return false;
840 
841   LHS = N.getOperand(0);
842   RHS = N.getOperand(1);
843   CC  = N.getOperand(4);
844   return true;
845 }
846 
847 /// Return true if this is a SetCC-equivalent operation with only one use.
848 /// If this is true, it allows the users to invert the operation for free when
849 /// it is profitable to do so.
850 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
851   SDValue N0, N1, N2;
852   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
853     return true;
854   return false;
855 }
856 
857 // \brief Returns the SDNode if it is a constant float BuildVector
858 // or constant float.
859 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
860   if (isa<ConstantFPSDNode>(N))
861     return N.getNode();
862   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
863     return N.getNode();
864   return nullptr;
865 }
866 
867 // Determines if it is a constant integer or a build vector of constant
868 // integers (and undefs).
869 // Do not permit build vector implicit truncation.
870 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
871   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
872     return !(Const->isOpaque() && NoOpaques);
873   if (N.getOpcode() != ISD::BUILD_VECTOR)
874     return false;
875   unsigned BitWidth = N.getScalarValueSizeInBits();
876   for (const SDValue &Op : N->op_values()) {
877     if (Op.isUndef())
878       continue;
879     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
880     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
881         (Const->isOpaque() && NoOpaques))
882       return false;
883   }
884   return true;
885 }
886 
887 // Determines if it is a constant null integer or a splatted vector of a
888 // constant null integer (with no undefs).
889 // Build vector implicit truncation is not an issue for null values.
890 static bool isNullConstantOrNullSplatConstant(SDValue N) {
891   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
892     return Splat->isNullValue();
893   return false;
894 }
895 
896 // Determines if it is a constant integer of one or a splatted vector of a
897 // constant integer of one (with no undefs).
898 // Do not permit build vector implicit truncation.
899 static bool isOneConstantOrOneSplatConstant(SDValue N) {
900   unsigned BitWidth = N.getScalarValueSizeInBits();
901   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
902     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
903   return false;
904 }
905 
906 // Determines if it is a constant integer of all ones or a splatted vector of a
907 // constant integer of all ones (with no undefs).
908 // Do not permit build vector implicit truncation.
909 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
910   unsigned BitWidth = N.getScalarValueSizeInBits();
911   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
912     return Splat->isAllOnesValue() &&
913            Splat->getAPIntValue().getBitWidth() == BitWidth;
914   return false;
915 }
916 
917 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
918 // undef's.
919 static bool isAnyConstantBuildVector(const SDNode *N) {
920   return ISD::isBuildVectorOfConstantSDNodes(N) ||
921          ISD::isBuildVectorOfConstantFPSDNodes(N);
922 }
923 
924 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
925                                     SDValue N1) {
926   EVT VT = N0.getValueType();
927   if (N0.getOpcode() == Opc) {
928     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
929       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
930         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
931         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
932           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
933         return SDValue();
934       }
935       if (N0.hasOneUse()) {
936         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
937         // use
938         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
939         if (!OpNode.getNode())
940           return SDValue();
941         AddToWorklist(OpNode.getNode());
942         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
943       }
944     }
945   }
946 
947   if (N1.getOpcode() == Opc) {
948     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
949       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
950         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
951         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
952           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
953         return SDValue();
954       }
955       if (N1.hasOneUse()) {
956         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
957         // use
958         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
959         if (!OpNode.getNode())
960           return SDValue();
961         AddToWorklist(OpNode.getNode());
962         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
963       }
964     }
965   }
966 
967   return SDValue();
968 }
969 
970 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
971                                bool AddTo) {
972   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
973   ++NodesCombined;
974   DEBUG(dbgs() << "\nReplacing.1 ";
975         N->dump(&DAG);
976         dbgs() << "\nWith: ";
977         To[0].getNode()->dump(&DAG);
978         dbgs() << " and " << NumTo-1 << " other values\n");
979   for (unsigned i = 0, e = NumTo; i != e; ++i)
980     assert((!To[i].getNode() ||
981             N->getValueType(i) == To[i].getValueType()) &&
982            "Cannot combine value to value of different type!");
983 
984   WorklistRemover DeadNodes(*this);
985   DAG.ReplaceAllUsesWith(N, To);
986   if (AddTo) {
987     // Push the new nodes and any users onto the worklist
988     for (unsigned i = 0, e = NumTo; i != e; ++i) {
989       if (To[i].getNode()) {
990         AddToWorklist(To[i].getNode());
991         AddUsersToWorklist(To[i].getNode());
992       }
993     }
994   }
995 
996   // Finally, if the node is now dead, remove it from the graph.  The node
997   // may not be dead if the replacement process recursively simplified to
998   // something else needing this node.
999   if (N->use_empty())
1000     deleteAndRecombine(N);
1001   return SDValue(N, 0);
1002 }
1003 
1004 void DAGCombiner::
1005 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1006   // Replace all uses.  If any nodes become isomorphic to other nodes and
1007   // are deleted, make sure to remove them from our worklist.
1008   WorklistRemover DeadNodes(*this);
1009   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1010 
1011   // Push the new node and any (possibly new) users onto the worklist.
1012   AddToWorklist(TLO.New.getNode());
1013   AddUsersToWorklist(TLO.New.getNode());
1014 
1015   // Finally, if the node is now dead, remove it from the graph.  The node
1016   // may not be dead if the replacement process recursively simplified to
1017   // something else needing this node.
1018   if (TLO.Old.getNode()->use_empty())
1019     deleteAndRecombine(TLO.Old.getNode());
1020 }
1021 
1022 /// Check the specified integer node value to see if it can be simplified or if
1023 /// things it uses can be simplified by bit propagation. If so, return true.
1024 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1025   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1026   KnownBits Known;
1027   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1028     return false;
1029 
1030   // Revisit the node.
1031   AddToWorklist(Op.getNode());
1032 
1033   // Replace the old value with the new one.
1034   ++NodesCombined;
1035   DEBUG(dbgs() << "\nReplacing.2 ";
1036         TLO.Old.getNode()->dump(&DAG);
1037         dbgs() << "\nWith: ";
1038         TLO.New.getNode()->dump(&DAG);
1039         dbgs() << '\n');
1040 
1041   CommitTargetLoweringOpt(TLO);
1042   return true;
1043 }
1044 
1045 /// Check the specified vector node value to see if it can be simplified or
1046 /// if things it uses can be simplified as it only uses some of the elements.
1047 /// If so, return true.
1048 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1049                                              const APInt &Demanded) {
1050   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1051   APInt KnownUndef, KnownZero;
1052   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1053     return false;
1054 
1055   // Revisit the node.
1056   AddToWorklist(Op.getNode());
1057 
1058   // Replace the old value with the new one.
1059   ++NodesCombined;
1060   DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1061         dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); dbgs() << '\n');
1062 
1063   CommitTargetLoweringOpt(TLO);
1064   return true;
1065 }
1066 
1067 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1068   SDLoc DL(Load);
1069   EVT VT = Load->getValueType(0);
1070   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1071 
1072   DEBUG(dbgs() << "\nReplacing.9 ";
1073         Load->dump(&DAG);
1074         dbgs() << "\nWith: ";
1075         Trunc.getNode()->dump(&DAG);
1076         dbgs() << '\n');
1077   WorklistRemover DeadNodes(*this);
1078   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1079   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1080   deleteAndRecombine(Load);
1081   AddToWorklist(Trunc.getNode());
1082 }
1083 
1084 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1085   Replace = false;
1086   SDLoc DL(Op);
1087   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1088     LoadSDNode *LD = cast<LoadSDNode>(Op);
1089     EVT MemVT = LD->getMemoryVT();
1090     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1091       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1092                                                        : ISD::EXTLOAD)
1093       : LD->getExtensionType();
1094     Replace = true;
1095     return DAG.getExtLoad(ExtType, DL, PVT,
1096                           LD->getChain(), LD->getBasePtr(),
1097                           MemVT, LD->getMemOperand());
1098   }
1099 
1100   unsigned Opc = Op.getOpcode();
1101   switch (Opc) {
1102   default: break;
1103   case ISD::AssertSext:
1104     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106     break;
1107   case ISD::AssertZext:
1108     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110     break;
1111   case ISD::Constant: {
1112     unsigned ExtOpc =
1113       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1114     return DAG.getNode(ExtOpc, DL, PVT, Op);
1115   }
1116   }
1117 
1118   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119     return SDValue();
1120   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121 }
1122 
1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1124   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1125     return SDValue();
1126   EVT OldVT = Op.getValueType();
1127   SDLoc DL(Op);
1128   bool Replace = false;
1129   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130   if (!NewOp.getNode())
1131     return SDValue();
1132   AddToWorklist(NewOp.getNode());
1133 
1134   if (Replace)
1135     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137                      DAG.getValueType(OldVT));
1138 }
1139 
1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141   EVT OldVT = Op.getValueType();
1142   SDLoc DL(Op);
1143   bool Replace = false;
1144   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145   if (!NewOp.getNode())
1146     return SDValue();
1147   AddToWorklist(NewOp.getNode());
1148 
1149   if (Replace)
1150     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152 }
1153 
1154 /// Promote the specified integer binary operation if the target indicates it is
1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156 /// i32 since i16 instructions are longer.
1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158   if (!LegalOperations)
1159     return SDValue();
1160 
1161   EVT VT = Op.getValueType();
1162   if (VT.isVector() || !VT.isInteger())
1163     return SDValue();
1164 
1165   // If operation type is 'undesirable', e.g. i16 on x86, consider
1166   // promoting it.
1167   unsigned Opc = Op.getOpcode();
1168   if (TLI.isTypeDesirableForOp(Opc, VT))
1169     return SDValue();
1170 
1171   EVT PVT = VT;
1172   // Consult target whether it is a good idea to promote this operation and
1173   // what's the right type to promote it to.
1174   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175     assert(PVT != VT && "Don't know what type to promote to!");
1176 
1177     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1178 
1179     bool Replace0 = false;
1180     SDValue N0 = Op.getOperand(0);
1181     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182 
1183     bool Replace1 = false;
1184     SDValue N1 = Op.getOperand(1);
1185     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186     SDLoc DL(Op);
1187 
1188     SDValue RV =
1189         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190 
1191     // We are always replacing N0/N1's use in N and only need
1192     // additional replacements if there are additional uses.
1193     Replace0 &= !N0->hasOneUse();
1194     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195 
1196     // Combine Op here so it is preserved past replacements.
1197     CombineTo(Op.getNode(), RV);
1198 
1199     // If operands have a use ordering, make sure we deal with
1200     // predecessor first.
1201     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202       std::swap(N0, N1);
1203       std::swap(NN0, NN1);
1204     }
1205 
1206     if (Replace0) {
1207       AddToWorklist(NN0.getNode());
1208       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209     }
1210     if (Replace1) {
1211       AddToWorklist(NN1.getNode());
1212       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213     }
1214     return Op;
1215   }
1216   return SDValue();
1217 }
1218 
1219 /// Promote the specified integer shift operation if the target indicates it is
1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221 /// i32 since i16 instructions are longer.
1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223   if (!LegalOperations)
1224     return SDValue();
1225 
1226   EVT VT = Op.getValueType();
1227   if (VT.isVector() || !VT.isInteger())
1228     return SDValue();
1229 
1230   // If operation type is 'undesirable', e.g. i16 on x86, consider
1231   // promoting it.
1232   unsigned Opc = Op.getOpcode();
1233   if (TLI.isTypeDesirableForOp(Opc, VT))
1234     return SDValue();
1235 
1236   EVT PVT = VT;
1237   // Consult target whether it is a good idea to promote this operation and
1238   // what's the right type to promote it to.
1239   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240     assert(PVT != VT && "Don't know what type to promote to!");
1241 
1242     DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1243 
1244     bool Replace = false;
1245     SDValue N0 = Op.getOperand(0);
1246     SDValue N1 = Op.getOperand(1);
1247     if (Opc == ISD::SRA)
1248       N0 = SExtPromoteOperand(N0, PVT);
1249     else if (Opc == ISD::SRL)
1250       N0 = ZExtPromoteOperand(N0, PVT);
1251     else
1252       N0 = PromoteOperand(N0, PVT, Replace);
1253 
1254     if (!N0.getNode())
1255       return SDValue();
1256 
1257     SDLoc DL(Op);
1258     SDValue RV =
1259         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260 
1261     AddToWorklist(N0.getNode());
1262     if (Replace)
1263       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264 
1265     // Deal with Op being deleted.
1266     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267       return RV;
1268   }
1269   return SDValue();
1270 }
1271 
1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273   if (!LegalOperations)
1274     return SDValue();
1275 
1276   EVT VT = Op.getValueType();
1277   if (VT.isVector() || !VT.isInteger())
1278     return SDValue();
1279 
1280   // If operation type is 'undesirable', e.g. i16 on x86, consider
1281   // promoting it.
1282   unsigned Opc = Op.getOpcode();
1283   if (TLI.isTypeDesirableForOp(Opc, VT))
1284     return SDValue();
1285 
1286   EVT PVT = VT;
1287   // Consult target whether it is a good idea to promote this operation and
1288   // what's the right type to promote it to.
1289   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290     assert(PVT != VT && "Don't know what type to promote to!");
1291     // fold (aext (aext x)) -> (aext x)
1292     // fold (aext (zext x)) -> (zext x)
1293     // fold (aext (sext x)) -> (sext x)
1294     DEBUG(dbgs() << "\nPromoting ";
1295           Op.getNode()->dump(&DAG));
1296     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1297   }
1298   return SDValue();
1299 }
1300 
1301 bool DAGCombiner::PromoteLoad(SDValue Op) {
1302   if (!LegalOperations)
1303     return false;
1304 
1305   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1306     return false;
1307 
1308   EVT VT = Op.getValueType();
1309   if (VT.isVector() || !VT.isInteger())
1310     return false;
1311 
1312   // If operation type is 'undesirable', e.g. i16 on x86, consider
1313   // promoting it.
1314   unsigned Opc = Op.getOpcode();
1315   if (TLI.isTypeDesirableForOp(Opc, VT))
1316     return false;
1317 
1318   EVT PVT = VT;
1319   // Consult target whether it is a good idea to promote this operation and
1320   // what's the right type to promote it to.
1321   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1322     assert(PVT != VT && "Don't know what type to promote to!");
1323 
1324     SDLoc DL(Op);
1325     SDNode *N = Op.getNode();
1326     LoadSDNode *LD = cast<LoadSDNode>(N);
1327     EVT MemVT = LD->getMemoryVT();
1328     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1329       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1330                                                        : ISD::EXTLOAD)
1331       : LD->getExtensionType();
1332     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1333                                    LD->getChain(), LD->getBasePtr(),
1334                                    MemVT, LD->getMemOperand());
1335     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1336 
1337     DEBUG(dbgs() << "\nPromoting ";
1338           N->dump(&DAG);
1339           dbgs() << "\nTo: ";
1340           Result.getNode()->dump(&DAG);
1341           dbgs() << '\n');
1342     WorklistRemover DeadNodes(*this);
1343     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1344     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1345     deleteAndRecombine(N);
1346     AddToWorklist(Result.getNode());
1347     return true;
1348   }
1349   return false;
1350 }
1351 
1352 /// \brief Recursively delete a node which has no uses and any operands for
1353 /// which it is the only use.
1354 ///
1355 /// Note that this both deletes the nodes and removes them from the worklist.
1356 /// It also adds any nodes who have had a user deleted to the worklist as they
1357 /// may now have only one use and subject to other combines.
1358 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1359   if (!N->use_empty())
1360     return false;
1361 
1362   SmallSetVector<SDNode *, 16> Nodes;
1363   Nodes.insert(N);
1364   do {
1365     N = Nodes.pop_back_val();
1366     if (!N)
1367       continue;
1368 
1369     if (N->use_empty()) {
1370       for (const SDValue &ChildN : N->op_values())
1371         Nodes.insert(ChildN.getNode());
1372 
1373       removeFromWorklist(N);
1374       DAG.DeleteNode(N);
1375     } else {
1376       AddToWorklist(N);
1377     }
1378   } while (!Nodes.empty());
1379   return true;
1380 }
1381 
1382 //===----------------------------------------------------------------------===//
1383 //  Main DAG Combiner implementation
1384 //===----------------------------------------------------------------------===//
1385 
1386 void DAGCombiner::Run(CombineLevel AtLevel) {
1387   // set the instance variables, so that the various visit routines may use it.
1388   Level = AtLevel;
1389   LegalOperations = Level >= AfterLegalizeVectorOps;
1390   LegalTypes = Level >= AfterLegalizeTypes;
1391 
1392   // Add all the dag nodes to the worklist.
1393   for (SDNode &Node : DAG.allnodes())
1394     AddToWorklist(&Node);
1395 
1396   // Create a dummy node (which is not added to allnodes), that adds a reference
1397   // to the root node, preventing it from being deleted, and tracking any
1398   // changes of the root.
1399   HandleSDNode Dummy(DAG.getRoot());
1400 
1401   // While the worklist isn't empty, find a node and try to combine it.
1402   while (!WorklistMap.empty()) {
1403     SDNode *N;
1404     // The Worklist holds the SDNodes in order, but it may contain null entries.
1405     do {
1406       N = Worklist.pop_back_val();
1407     } while (!N);
1408 
1409     bool GoodWorklistEntry = WorklistMap.erase(N);
1410     (void)GoodWorklistEntry;
1411     assert(GoodWorklistEntry &&
1412            "Found a worklist entry without a corresponding map entry!");
1413 
1414     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1415     // N is deleted from the DAG, since they too may now be dead or may have a
1416     // reduced number of uses, allowing other xforms.
1417     if (recursivelyDeleteUnusedNodes(N))
1418       continue;
1419 
1420     WorklistRemover DeadNodes(*this);
1421 
1422     // If this combine is running after legalizing the DAG, re-legalize any
1423     // nodes pulled off the worklist.
1424     if (Level == AfterLegalizeDAG) {
1425       SmallSetVector<SDNode *, 16> UpdatedNodes;
1426       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1427 
1428       for (SDNode *LN : UpdatedNodes) {
1429         AddToWorklist(LN);
1430         AddUsersToWorklist(LN);
1431       }
1432       if (!NIsValid)
1433         continue;
1434     }
1435 
1436     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1437 
1438     // Add any operands of the new node which have not yet been combined to the
1439     // worklist as well. Because the worklist uniques things already, this
1440     // won't repeatedly process the same operand.
1441     CombinedNodes.insert(N);
1442     for (const SDValue &ChildN : N->op_values())
1443       if (!CombinedNodes.count(ChildN.getNode()))
1444         AddToWorklist(ChildN.getNode());
1445 
1446     SDValue RV = combine(N);
1447 
1448     if (!RV.getNode())
1449       continue;
1450 
1451     ++NodesCombined;
1452 
1453     // If we get back the same node we passed in, rather than a new node or
1454     // zero, we know that the node must have defined multiple values and
1455     // CombineTo was used.  Since CombineTo takes care of the worklist
1456     // mechanics for us, we have no work to do in this case.
1457     if (RV.getNode() == N)
1458       continue;
1459 
1460     assert(N->getOpcode() != ISD::DELETED_NODE &&
1461            RV.getOpcode() != ISD::DELETED_NODE &&
1462            "Node was deleted but visit returned new node!");
1463 
1464     DEBUG(dbgs() << " ... into: ";
1465           RV.getNode()->dump(&DAG));
1466 
1467     if (N->getNumValues() == RV.getNode()->getNumValues())
1468       DAG.ReplaceAllUsesWith(N, RV.getNode());
1469     else {
1470       assert(N->getValueType(0) == RV.getValueType() &&
1471              N->getNumValues() == 1 && "Type mismatch");
1472       DAG.ReplaceAllUsesWith(N, &RV);
1473     }
1474 
1475     // Push the new node and any users onto the worklist
1476     AddToWorklist(RV.getNode());
1477     AddUsersToWorklist(RV.getNode());
1478 
1479     // Finally, if the node is now dead, remove it from the graph.  The node
1480     // may not be dead if the replacement process recursively simplified to
1481     // something else needing this node. This will also take care of adding any
1482     // operands which have lost a user to the worklist.
1483     recursivelyDeleteUnusedNodes(N);
1484   }
1485 
1486   // If the root changed (e.g. it was a dead load, update the root).
1487   DAG.setRoot(Dummy.getValue());
1488   DAG.RemoveDeadNodes();
1489 }
1490 
1491 SDValue DAGCombiner::visit(SDNode *N) {
1492   switch (N->getOpcode()) {
1493   default: break;
1494   case ISD::TokenFactor:        return visitTokenFactor(N);
1495   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1496   case ISD::ADD:                return visitADD(N);
1497   case ISD::SUB:                return visitSUB(N);
1498   case ISD::ADDC:               return visitADDC(N);
1499   case ISD::UADDO:              return visitUADDO(N);
1500   case ISD::SUBC:               return visitSUBC(N);
1501   case ISD::USUBO:              return visitUSUBO(N);
1502   case ISD::ADDE:               return visitADDE(N);
1503   case ISD::ADDCARRY:           return visitADDCARRY(N);
1504   case ISD::SUBE:               return visitSUBE(N);
1505   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1506   case ISD::MUL:                return visitMUL(N);
1507   case ISD::SDIV:               return visitSDIV(N);
1508   case ISD::UDIV:               return visitUDIV(N);
1509   case ISD::SREM:
1510   case ISD::UREM:               return visitREM(N);
1511   case ISD::MULHU:              return visitMULHU(N);
1512   case ISD::MULHS:              return visitMULHS(N);
1513   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1514   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1515   case ISD::SMULO:              return visitSMULO(N);
1516   case ISD::UMULO:              return visitUMULO(N);
1517   case ISD::SMIN:
1518   case ISD::SMAX:
1519   case ISD::UMIN:
1520   case ISD::UMAX:               return visitIMINMAX(N);
1521   case ISD::AND:                return visitAND(N);
1522   case ISD::OR:                 return visitOR(N);
1523   case ISD::XOR:                return visitXOR(N);
1524   case ISD::SHL:                return visitSHL(N);
1525   case ISD::SRA:                return visitSRA(N);
1526   case ISD::SRL:                return visitSRL(N);
1527   case ISD::ROTR:
1528   case ISD::ROTL:               return visitRotate(N);
1529   case ISD::ABS:                return visitABS(N);
1530   case ISD::BSWAP:              return visitBSWAP(N);
1531   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1532   case ISD::CTLZ:               return visitCTLZ(N);
1533   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1534   case ISD::CTTZ:               return visitCTTZ(N);
1535   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1536   case ISD::CTPOP:              return visitCTPOP(N);
1537   case ISD::SELECT:             return visitSELECT(N);
1538   case ISD::VSELECT:            return visitVSELECT(N);
1539   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1540   case ISD::SETCC:              return visitSETCC(N);
1541   case ISD::SETCCE:             return visitSETCCE(N);
1542   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1543   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1544   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1545   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1546   case ISD::AssertSext:
1547   case ISD::AssertZext:         return visitAssertExt(N);
1548   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1549   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1550   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1551   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1552   case ISD::BITCAST:            return visitBITCAST(N);
1553   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1554   case ISD::FADD:               return visitFADD(N);
1555   case ISD::FSUB:               return visitFSUB(N);
1556   case ISD::FMUL:               return visitFMUL(N);
1557   case ISD::FMA:                return visitFMA(N);
1558   case ISD::FDIV:               return visitFDIV(N);
1559   case ISD::FREM:               return visitFREM(N);
1560   case ISD::FSQRT:              return visitFSQRT(N);
1561   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1562   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1563   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1564   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1565   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1566   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1567   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1568   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1569   case ISD::FNEG:               return visitFNEG(N);
1570   case ISD::FABS:               return visitFABS(N);
1571   case ISD::FFLOOR:             return visitFFLOOR(N);
1572   case ISD::FMINNUM:            return visitFMINNUM(N);
1573   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1574   case ISD::FCEIL:              return visitFCEIL(N);
1575   case ISD::FTRUNC:             return visitFTRUNC(N);
1576   case ISD::BRCOND:             return visitBRCOND(N);
1577   case ISD::BR_CC:              return visitBR_CC(N);
1578   case ISD::LOAD:               return visitLOAD(N);
1579   case ISD::STORE:              return visitSTORE(N);
1580   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1581   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1582   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1583   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1584   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1585   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1586   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1587   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1588   case ISD::MGATHER:            return visitMGATHER(N);
1589   case ISD::MLOAD:              return visitMLOAD(N);
1590   case ISD::MSCATTER:           return visitMSCATTER(N);
1591   case ISD::MSTORE:             return visitMSTORE(N);
1592   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1593   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1594   }
1595   return SDValue();
1596 }
1597 
1598 SDValue DAGCombiner::combine(SDNode *N) {
1599   SDValue RV = visit(N);
1600 
1601   // If nothing happened, try a target-specific DAG combine.
1602   if (!RV.getNode()) {
1603     assert(N->getOpcode() != ISD::DELETED_NODE &&
1604            "Node was deleted but visit returned NULL!");
1605 
1606     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1607         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1608 
1609       // Expose the DAG combiner to the target combiner impls.
1610       TargetLowering::DAGCombinerInfo
1611         DagCombineInfo(DAG, Level, false, this);
1612 
1613       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1614     }
1615   }
1616 
1617   // If nothing happened still, try promoting the operation.
1618   if (!RV.getNode()) {
1619     switch (N->getOpcode()) {
1620     default: break;
1621     case ISD::ADD:
1622     case ISD::SUB:
1623     case ISD::MUL:
1624     case ISD::AND:
1625     case ISD::OR:
1626     case ISD::XOR:
1627       RV = PromoteIntBinOp(SDValue(N, 0));
1628       break;
1629     case ISD::SHL:
1630     case ISD::SRA:
1631     case ISD::SRL:
1632       RV = PromoteIntShiftOp(SDValue(N, 0));
1633       break;
1634     case ISD::SIGN_EXTEND:
1635     case ISD::ZERO_EXTEND:
1636     case ISD::ANY_EXTEND:
1637       RV = PromoteExtend(SDValue(N, 0));
1638       break;
1639     case ISD::LOAD:
1640       if (PromoteLoad(SDValue(N, 0)))
1641         RV = SDValue(N, 0);
1642       break;
1643     }
1644   }
1645 
1646   // If N is a commutative binary node, try eliminate it if the commuted
1647   // version is already present in the DAG.
1648   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1649       N->getNumValues() == 1) {
1650     SDValue N0 = N->getOperand(0);
1651     SDValue N1 = N->getOperand(1);
1652 
1653     // Constant operands are canonicalized to RHS.
1654     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1655       SDValue Ops[] = {N1, N0};
1656       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1657                                             N->getFlags());
1658       if (CSENode)
1659         return SDValue(CSENode, 0);
1660     }
1661   }
1662 
1663   return RV;
1664 }
1665 
1666 /// Given a node, return its input chain if it has one, otherwise return a null
1667 /// sd operand.
1668 static SDValue getInputChainForNode(SDNode *N) {
1669   if (unsigned NumOps = N->getNumOperands()) {
1670     if (N->getOperand(0).getValueType() == MVT::Other)
1671       return N->getOperand(0);
1672     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1673       return N->getOperand(NumOps-1);
1674     for (unsigned i = 1; i < NumOps-1; ++i)
1675       if (N->getOperand(i).getValueType() == MVT::Other)
1676         return N->getOperand(i);
1677   }
1678   return SDValue();
1679 }
1680 
1681 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1682   // If N has two operands, where one has an input chain equal to the other,
1683   // the 'other' chain is redundant.
1684   if (N->getNumOperands() == 2) {
1685     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1686       return N->getOperand(0);
1687     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1688       return N->getOperand(1);
1689   }
1690 
1691   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1692   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1693   SmallPtrSet<SDNode*, 16> SeenOps;
1694   bool Changed = false;             // If we should replace this token factor.
1695 
1696   // Start out with this token factor.
1697   TFs.push_back(N);
1698 
1699   // Iterate through token factors.  The TFs grows when new token factors are
1700   // encountered.
1701   for (unsigned i = 0; i < TFs.size(); ++i) {
1702     SDNode *TF = TFs[i];
1703 
1704     // Check each of the operands.
1705     for (const SDValue &Op : TF->op_values()) {
1706       switch (Op.getOpcode()) {
1707       case ISD::EntryToken:
1708         // Entry tokens don't need to be added to the list. They are
1709         // redundant.
1710         Changed = true;
1711         break;
1712 
1713       case ISD::TokenFactor:
1714         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1715           // Queue up for processing.
1716           TFs.push_back(Op.getNode());
1717           // Clean up in case the token factor is removed.
1718           AddToWorklist(Op.getNode());
1719           Changed = true;
1720           break;
1721         }
1722         LLVM_FALLTHROUGH;
1723 
1724       default:
1725         // Only add if it isn't already in the list.
1726         if (SeenOps.insert(Op.getNode()).second)
1727           Ops.push_back(Op);
1728         else
1729           Changed = true;
1730         break;
1731       }
1732     }
1733   }
1734 
1735   // Remove Nodes that are chained to another node in the list. Do so
1736   // by walking up chains breath-first stopping when we've seen
1737   // another operand. In general we must climb to the EntryNode, but we can exit
1738   // early if we find all remaining work is associated with just one operand as
1739   // no further pruning is possible.
1740 
1741   // List of nodes to search through and original Ops from which they originate.
1742   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1743   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1744   SmallPtrSet<SDNode *, 16> SeenChains;
1745   bool DidPruneOps = false;
1746 
1747   unsigned NumLeftToConsider = 0;
1748   for (const SDValue &Op : Ops) {
1749     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1750     OpWorkCount.push_back(1);
1751   }
1752 
1753   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1754     // If this is an Op, we can remove the op from the list. Remark any
1755     // search associated with it as from the current OpNumber.
1756     if (SeenOps.count(Op) != 0) {
1757       Changed = true;
1758       DidPruneOps = true;
1759       unsigned OrigOpNumber = 0;
1760       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1761         OrigOpNumber++;
1762       assert((OrigOpNumber != Ops.size()) &&
1763              "expected to find TokenFactor Operand");
1764       // Re-mark worklist from OrigOpNumber to OpNumber
1765       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1766         if (Worklist[i].second == OrigOpNumber) {
1767           Worklist[i].second = OpNumber;
1768         }
1769       }
1770       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1771       OpWorkCount[OrigOpNumber] = 0;
1772       NumLeftToConsider--;
1773     }
1774     // Add if it's a new chain
1775     if (SeenChains.insert(Op).second) {
1776       OpWorkCount[OpNumber]++;
1777       Worklist.push_back(std::make_pair(Op, OpNumber));
1778     }
1779   };
1780 
1781   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1782     // We need at least be consider at least 2 Ops to prune.
1783     if (NumLeftToConsider <= 1)
1784       break;
1785     auto CurNode = Worklist[i].first;
1786     auto CurOpNumber = Worklist[i].second;
1787     assert((OpWorkCount[CurOpNumber] > 0) &&
1788            "Node should not appear in worklist");
1789     switch (CurNode->getOpcode()) {
1790     case ISD::EntryToken:
1791       // Hitting EntryToken is the only way for the search to terminate without
1792       // hitting
1793       // another operand's search. Prevent us from marking this operand
1794       // considered.
1795       NumLeftToConsider++;
1796       break;
1797     case ISD::TokenFactor:
1798       for (const SDValue &Op : CurNode->op_values())
1799         AddToWorklist(i, Op.getNode(), CurOpNumber);
1800       break;
1801     case ISD::CopyFromReg:
1802     case ISD::CopyToReg:
1803       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1804       break;
1805     default:
1806       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1807         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1808       break;
1809     }
1810     OpWorkCount[CurOpNumber]--;
1811     if (OpWorkCount[CurOpNumber] == 0)
1812       NumLeftToConsider--;
1813   }
1814 
1815   // If we've changed things around then replace token factor.
1816   if (Changed) {
1817     SDValue Result;
1818     if (Ops.empty()) {
1819       // The entry token is the only possible outcome.
1820       Result = DAG.getEntryNode();
1821     } else {
1822       if (DidPruneOps) {
1823         SmallVector<SDValue, 8> PrunedOps;
1824         //
1825         for (const SDValue &Op : Ops) {
1826           if (SeenChains.count(Op.getNode()) == 0)
1827             PrunedOps.push_back(Op);
1828         }
1829         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1830       } else {
1831         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1832       }
1833     }
1834     return Result;
1835   }
1836   return SDValue();
1837 }
1838 
1839 /// MERGE_VALUES can always be eliminated.
1840 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1841   WorklistRemover DeadNodes(*this);
1842   // Replacing results may cause a different MERGE_VALUES to suddenly
1843   // be CSE'd with N, and carry its uses with it. Iterate until no
1844   // uses remain, to ensure that the node can be safely deleted.
1845   // First add the users of this node to the work list so that they
1846   // can be tried again once they have new operands.
1847   AddUsersToWorklist(N);
1848   do {
1849     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1850       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1851   } while (!N->use_empty());
1852   deleteAndRecombine(N);
1853   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1854 }
1855 
1856 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1857 /// ConstantSDNode pointer else nullptr.
1858 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1859   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1860   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1861 }
1862 
1863 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1864   auto BinOpcode = BO->getOpcode();
1865   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1866           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1867           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1868           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1869           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1870           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1871           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1872           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1873           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1874          "Unexpected binary operator");
1875 
1876   // Bail out if any constants are opaque because we can't constant fold those.
1877   SDValue C1 = BO->getOperand(1);
1878   if (!isConstantOrConstantVector(C1, true) &&
1879       !isConstantFPBuildVectorOrConstantFP(C1))
1880     return SDValue();
1881 
1882   // Don't do this unless the old select is going away. We want to eliminate the
1883   // binary operator, not replace a binop with a select.
1884   // TODO: Handle ISD::SELECT_CC.
1885   SDValue Sel = BO->getOperand(0);
1886   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1887     return SDValue();
1888 
1889   SDValue CT = Sel.getOperand(1);
1890   if (!isConstantOrConstantVector(CT, true) &&
1891       !isConstantFPBuildVectorOrConstantFP(CT))
1892     return SDValue();
1893 
1894   SDValue CF = Sel.getOperand(2);
1895   if (!isConstantOrConstantVector(CF, true) &&
1896       !isConstantFPBuildVectorOrConstantFP(CF))
1897     return SDValue();
1898 
1899   // We have a select-of-constants followed by a binary operator with a
1900   // constant. Eliminate the binop by pulling the constant math into the select.
1901   // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1902   EVT VT = Sel.getValueType();
1903   SDLoc DL(Sel);
1904   SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1905   if (!NewCT.isUndef() &&
1906       !isConstantOrConstantVector(NewCT, true) &&
1907       !isConstantFPBuildVectorOrConstantFP(NewCT))
1908     return SDValue();
1909 
1910   SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1911   if (!NewCF.isUndef() &&
1912       !isConstantOrConstantVector(NewCF, true) &&
1913       !isConstantFPBuildVectorOrConstantFP(NewCF))
1914     return SDValue();
1915 
1916   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1917 }
1918 
1919 SDValue DAGCombiner::visitADD(SDNode *N) {
1920   SDValue N0 = N->getOperand(0);
1921   SDValue N1 = N->getOperand(1);
1922   EVT VT = N0.getValueType();
1923   SDLoc DL(N);
1924 
1925   // fold vector ops
1926   if (VT.isVector()) {
1927     if (SDValue FoldedVOp = SimplifyVBinOp(N))
1928       return FoldedVOp;
1929 
1930     // fold (add x, 0) -> x, vector edition
1931     if (ISD::isBuildVectorAllZeros(N1.getNode()))
1932       return N0;
1933     if (ISD::isBuildVectorAllZeros(N0.getNode()))
1934       return N1;
1935   }
1936 
1937   // fold (add x, undef) -> undef
1938   if (N0.isUndef())
1939     return N0;
1940 
1941   if (N1.isUndef())
1942     return N1;
1943 
1944   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1945     // canonicalize constant to RHS
1946     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1947       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1948     // fold (add c1, c2) -> c1+c2
1949     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1950                                       N1.getNode());
1951   }
1952 
1953   // fold (add x, 0) -> x
1954   if (isNullConstant(N1))
1955     return N0;
1956 
1957   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1958     // fold ((c1-A)+c2) -> (c1+c2)-A
1959     if (N0.getOpcode() == ISD::SUB &&
1960         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1961       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1962       return DAG.getNode(ISD::SUB, DL, VT,
1963                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1964                          N0.getOperand(1));
1965     }
1966 
1967     // add (sext i1 X), 1 -> zext (not i1 X)
1968     // We don't transform this pattern:
1969     //   add (zext i1 X), -1 -> sext (not i1 X)
1970     // because most (?) targets generate better code for the zext form.
1971     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1972         isOneConstantOrOneSplatConstant(N1)) {
1973       SDValue X = N0.getOperand(0);
1974       if ((!LegalOperations ||
1975            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1976             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1977           X.getScalarValueSizeInBits() == 1) {
1978         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1979         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1980       }
1981     }
1982 
1983     // Undo the add -> or combine to merge constant offsets from a frame index.
1984     if (N0.getOpcode() == ISD::OR &&
1985         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1986         isa<ConstantSDNode>(N0.getOperand(1)) &&
1987         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1988       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1989       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1990     }
1991   }
1992 
1993   if (SDValue NewSel = foldBinOpIntoSelect(N))
1994     return NewSel;
1995 
1996   // reassociate add
1997   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1998     return RADD;
1999 
2000   // fold ((0-A) + B) -> B-A
2001   if (N0.getOpcode() == ISD::SUB &&
2002       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2003     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2004 
2005   // fold (A + (0-B)) -> A-B
2006   if (N1.getOpcode() == ISD::SUB &&
2007       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2008     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2009 
2010   // fold (A+(B-A)) -> B
2011   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2012     return N1.getOperand(0);
2013 
2014   // fold ((B-A)+A) -> B
2015   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2016     return N0.getOperand(0);
2017 
2018   // fold (A+(B-(A+C))) to (B-C)
2019   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2020       N0 == N1.getOperand(1).getOperand(0))
2021     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2022                        N1.getOperand(1).getOperand(1));
2023 
2024   // fold (A+(B-(C+A))) to (B-C)
2025   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2026       N0 == N1.getOperand(1).getOperand(1))
2027     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2028                        N1.getOperand(1).getOperand(0));
2029 
2030   // fold (A+((B-A)+or-C)) to (B+or-C)
2031   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2032       N1.getOperand(0).getOpcode() == ISD::SUB &&
2033       N0 == N1.getOperand(0).getOperand(1))
2034     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2035                        N1.getOperand(1));
2036 
2037   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2038   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2039     SDValue N00 = N0.getOperand(0);
2040     SDValue N01 = N0.getOperand(1);
2041     SDValue N10 = N1.getOperand(0);
2042     SDValue N11 = N1.getOperand(1);
2043 
2044     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2045       return DAG.getNode(ISD::SUB, DL, VT,
2046                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2047                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2048   }
2049 
2050   if (SimplifyDemandedBits(SDValue(N, 0)))
2051     return SDValue(N, 0);
2052 
2053   // fold (a+b) -> (a|b) iff a and b share no bits.
2054   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2055       DAG.haveNoCommonBitsSet(N0, N1))
2056     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2057 
2058   if (SDValue Combined = visitADDLike(N0, N1, N))
2059     return Combined;
2060 
2061   if (SDValue Combined = visitADDLike(N1, N0, N))
2062     return Combined;
2063 
2064   return SDValue();
2065 }
2066 
2067 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2068   bool Masked = false;
2069 
2070   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2071   while (true) {
2072     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2073       V = V.getOperand(0);
2074       continue;
2075     }
2076 
2077     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2078       Masked = true;
2079       V = V.getOperand(0);
2080       continue;
2081     }
2082 
2083     break;
2084   }
2085 
2086   // If this is not a carry, return.
2087   if (V.getResNo() != 1)
2088     return SDValue();
2089 
2090   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2091       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2092     return SDValue();
2093 
2094   // If the result is masked, then no matter what kind of bool it is we can
2095   // return. If it isn't, then we need to make sure the bool type is either 0 or
2096   // 1 and not other values.
2097   if (Masked ||
2098       TLI.getBooleanContents(V.getValueType()) ==
2099           TargetLoweringBase::ZeroOrOneBooleanContent)
2100     return V;
2101 
2102   return SDValue();
2103 }
2104 
2105 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2106   EVT VT = N0.getValueType();
2107   SDLoc DL(LocReference);
2108 
2109   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2110   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2111       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2112     return DAG.getNode(ISD::SUB, DL, VT, N0,
2113                        DAG.getNode(ISD::SHL, DL, VT,
2114                                    N1.getOperand(0).getOperand(1),
2115                                    N1.getOperand(1)));
2116 
2117   if (N1.getOpcode() == ISD::AND) {
2118     SDValue AndOp0 = N1.getOperand(0);
2119     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2120     unsigned DestBits = VT.getScalarSizeInBits();
2121 
2122     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2123     // and similar xforms where the inner op is either ~0 or 0.
2124     if (NumSignBits == DestBits &&
2125         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2126       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2127   }
2128 
2129   // add (sext i1), X -> sub X, (zext i1)
2130   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2131       N0.getOperand(0).getValueType() == MVT::i1 &&
2132       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2133     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2134     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2135   }
2136 
2137   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2138   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2139     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2140     if (TN->getVT() == MVT::i1) {
2141       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2142                                  DAG.getConstant(1, DL, VT));
2143       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2144     }
2145   }
2146 
2147   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2148   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2149       N1.getResNo() == 0)
2150     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2151                        N0, N1.getOperand(0), N1.getOperand(2));
2152 
2153   // (add X, Carry) -> (addcarry X, 0, Carry)
2154   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2155     if (SDValue Carry = getAsCarry(TLI, N1))
2156       return DAG.getNode(ISD::ADDCARRY, DL,
2157                          DAG.getVTList(VT, Carry.getValueType()), N0,
2158                          DAG.getConstant(0, DL, VT), Carry);
2159 
2160   return SDValue();
2161 }
2162 
2163 SDValue DAGCombiner::visitADDC(SDNode *N) {
2164   SDValue N0 = N->getOperand(0);
2165   SDValue N1 = N->getOperand(1);
2166   EVT VT = N0.getValueType();
2167   SDLoc DL(N);
2168 
2169   // If the flag result is dead, turn this into an ADD.
2170   if (!N->hasAnyUseOfValue(1))
2171     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2172                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2173 
2174   // canonicalize constant to RHS.
2175   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2176   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2177   if (N0C && !N1C)
2178     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2179 
2180   // fold (addc x, 0) -> x + no carry out
2181   if (isNullConstant(N1))
2182     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2183                                         DL, MVT::Glue));
2184 
2185   // If it cannot overflow, transform into an add.
2186   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2187     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2188                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2189 
2190   return SDValue();
2191 }
2192 
2193 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2194   SDValue N0 = N->getOperand(0);
2195   SDValue N1 = N->getOperand(1);
2196   EVT VT = N0.getValueType();
2197   if (VT.isVector())
2198     return SDValue();
2199 
2200   EVT CarryVT = N->getValueType(1);
2201   SDLoc DL(N);
2202 
2203   // If the flag result is dead, turn this into an ADD.
2204   if (!N->hasAnyUseOfValue(1))
2205     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2206                      DAG.getUNDEF(CarryVT));
2207 
2208   // canonicalize constant to RHS.
2209   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2210   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2211   if (N0C && !N1C)
2212     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2213 
2214   // fold (uaddo x, 0) -> x + no carry out
2215   if (isNullConstant(N1))
2216     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2217 
2218   // If it cannot overflow, transform into an add.
2219   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2220     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2221                      DAG.getConstant(0, DL, CarryVT));
2222 
2223   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2224     return Combined;
2225 
2226   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2227     return Combined;
2228 
2229   return SDValue();
2230 }
2231 
2232 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2233   auto VT = N0.getValueType();
2234 
2235   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2236   // If Y + 1 cannot overflow.
2237   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2238     SDValue Y = N1.getOperand(0);
2239     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2240     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2241       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2242                          N1.getOperand(2));
2243   }
2244 
2245   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2246   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2247     if (SDValue Carry = getAsCarry(TLI, N1))
2248       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2249                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2250 
2251   return SDValue();
2252 }
2253 
2254 SDValue DAGCombiner::visitADDE(SDNode *N) {
2255   SDValue N0 = N->getOperand(0);
2256   SDValue N1 = N->getOperand(1);
2257   SDValue CarryIn = N->getOperand(2);
2258 
2259   // canonicalize constant to RHS
2260   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2261   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2262   if (N0C && !N1C)
2263     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2264                        N1, N0, CarryIn);
2265 
2266   // fold (adde x, y, false) -> (addc x, y)
2267   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2268     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2269 
2270   return SDValue();
2271 }
2272 
2273 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2274   SDValue N0 = N->getOperand(0);
2275   SDValue N1 = N->getOperand(1);
2276   SDValue CarryIn = N->getOperand(2);
2277   SDLoc DL(N);
2278 
2279   // canonicalize constant to RHS
2280   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2281   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2282   if (N0C && !N1C)
2283     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2284 
2285   // fold (addcarry x, y, false) -> (uaddo x, y)
2286   if (isNullConstant(CarryIn))
2287     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2288 
2289   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2290   if (isNullConstant(N0) && isNullConstant(N1)) {
2291     EVT VT = N0.getValueType();
2292     EVT CarryVT = CarryIn.getValueType();
2293     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2294     AddToWorklist(CarryExt.getNode());
2295     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2296                                     DAG.getConstant(1, DL, VT)),
2297                      DAG.getConstant(0, DL, CarryVT));
2298   }
2299 
2300   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2301     return Combined;
2302 
2303   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2304     return Combined;
2305 
2306   return SDValue();
2307 }
2308 
2309 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2310                                        SDNode *N) {
2311   // Iff the flag result is dead:
2312   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2313   if ((N0.getOpcode() == ISD::ADD ||
2314        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2315       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2316     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2317                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2318 
2319   /**
2320    * When one of the addcarry argument is itself a carry, we may be facing
2321    * a diamond carry propagation. In which case we try to transform the DAG
2322    * to ensure linear carry propagation if that is possible.
2323    *
2324    * We are trying to get:
2325    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2326    */
2327   if (auto Y = getAsCarry(TLI, N1)) {
2328     /**
2329      *            (uaddo A, B)
2330      *             /       \
2331      *          Carry      Sum
2332      *            |          \
2333      *            | (addcarry *, 0, Z)
2334      *            |       /
2335      *             \   Carry
2336      *              |   /
2337      * (addcarry X, *, *)
2338      */
2339     if (Y.getOpcode() == ISD::UADDO &&
2340         CarryIn.getResNo() == 1 &&
2341         CarryIn.getOpcode() == ISD::ADDCARRY &&
2342         isNullConstant(CarryIn.getOperand(1)) &&
2343         CarryIn.getOperand(0) == Y.getValue(0)) {
2344       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2345                               Y.getOperand(0), Y.getOperand(1),
2346                               CarryIn.getOperand(2));
2347       AddToWorklist(NewY.getNode());
2348       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2349                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2350                          NewY.getValue(1));
2351     }
2352   }
2353 
2354   return SDValue();
2355 }
2356 
2357 // Since it may not be valid to emit a fold to zero for vector initializers
2358 // check if we can before folding.
2359 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2360                              SelectionDAG &DAG, bool LegalOperations,
2361                              bool LegalTypes) {
2362   if (!VT.isVector())
2363     return DAG.getConstant(0, DL, VT);
2364   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2365     return DAG.getConstant(0, DL, VT);
2366   return SDValue();
2367 }
2368 
2369 SDValue DAGCombiner::visitSUB(SDNode *N) {
2370   SDValue N0 = N->getOperand(0);
2371   SDValue N1 = N->getOperand(1);
2372   EVT VT = N0.getValueType();
2373   SDLoc DL(N);
2374 
2375   // fold vector ops
2376   if (VT.isVector()) {
2377     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2378       return FoldedVOp;
2379 
2380     // fold (sub x, 0) -> x, vector edition
2381     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2382       return N0;
2383   }
2384 
2385   // fold (sub x, x) -> 0
2386   // FIXME: Refactor this and xor and other similar operations together.
2387   if (N0 == N1)
2388     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2389   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2390       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2391     // fold (sub c1, c2) -> c1-c2
2392     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2393                                       N1.getNode());
2394   }
2395 
2396   if (SDValue NewSel = foldBinOpIntoSelect(N))
2397     return NewSel;
2398 
2399   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2400 
2401   // fold (sub x, c) -> (add x, -c)
2402   if (N1C) {
2403     return DAG.getNode(ISD::ADD, DL, VT, N0,
2404                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2405   }
2406 
2407   if (isNullConstantOrNullSplatConstant(N0)) {
2408     unsigned BitWidth = VT.getScalarSizeInBits();
2409     // Right-shifting everything out but the sign bit followed by negation is
2410     // the same as flipping arithmetic/logical shift type without the negation:
2411     // -(X >>u 31) -> (X >>s 31)
2412     // -(X >>s 31) -> (X >>u 31)
2413     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2414       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2415       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2416         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2417         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2418           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2419       }
2420     }
2421 
2422     // 0 - X --> 0 if the sub is NUW.
2423     if (N->getFlags().hasNoUnsignedWrap())
2424       return N0;
2425 
2426     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2427       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2428       // N1 must be 0 because negating the minimum signed value is undefined.
2429       if (N->getFlags().hasNoSignedWrap())
2430         return N0;
2431 
2432       // 0 - X --> X if X is 0 or the minimum signed value.
2433       return N1;
2434     }
2435   }
2436 
2437   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2438   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2439     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2440 
2441   // fold A-(A-B) -> B
2442   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2443     return N1.getOperand(1);
2444 
2445   // fold (A+B)-A -> B
2446   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2447     return N0.getOperand(1);
2448 
2449   // fold (A+B)-B -> A
2450   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2451     return N0.getOperand(0);
2452 
2453   // fold C2-(A+C1) -> (C2-C1)-A
2454   if (N1.getOpcode() == ISD::ADD) {
2455     SDValue N11 = N1.getOperand(1);
2456     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2457         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2458       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2459       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2460     }
2461   }
2462 
2463   // fold ((A+(B+or-C))-B) -> A+or-C
2464   if (N0.getOpcode() == ISD::ADD &&
2465       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2466        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2467       N0.getOperand(1).getOperand(0) == N1)
2468     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2469                        N0.getOperand(1).getOperand(1));
2470 
2471   // fold ((A+(C+B))-B) -> A+C
2472   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2473       N0.getOperand(1).getOperand(1) == N1)
2474     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2475                        N0.getOperand(1).getOperand(0));
2476 
2477   // fold ((A-(B-C))-C) -> A-B
2478   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2479       N0.getOperand(1).getOperand(1) == N1)
2480     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2481                        N0.getOperand(1).getOperand(0));
2482 
2483   // If either operand of a sub is undef, the result is undef
2484   if (N0.isUndef())
2485     return N0;
2486   if (N1.isUndef())
2487     return N1;
2488 
2489   // If the relocation model supports it, consider symbol offsets.
2490   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2491     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2492       // fold (sub Sym, c) -> Sym-c
2493       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2494         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2495                                     GA->getOffset() -
2496                                         (uint64_t)N1C->getSExtValue());
2497       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2498       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2499         if (GA->getGlobal() == GB->getGlobal())
2500           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2501                                  DL, VT);
2502     }
2503 
2504   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2505   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2506     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2507     if (TN->getVT() == MVT::i1) {
2508       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2509                                  DAG.getConstant(1, DL, VT));
2510       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2511     }
2512   }
2513 
2514   return SDValue();
2515 }
2516 
2517 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2518   SDValue N0 = N->getOperand(0);
2519   SDValue N1 = N->getOperand(1);
2520   EVT VT = N0.getValueType();
2521   SDLoc DL(N);
2522 
2523   // If the flag result is dead, turn this into an SUB.
2524   if (!N->hasAnyUseOfValue(1))
2525     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2526                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2527 
2528   // fold (subc x, x) -> 0 + no borrow
2529   if (N0 == N1)
2530     return CombineTo(N, DAG.getConstant(0, DL, VT),
2531                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2532 
2533   // fold (subc x, 0) -> x + no borrow
2534   if (isNullConstant(N1))
2535     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2536 
2537   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2538   if (isAllOnesConstant(N0))
2539     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2540                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2541 
2542   return SDValue();
2543 }
2544 
2545 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2546   SDValue N0 = N->getOperand(0);
2547   SDValue N1 = N->getOperand(1);
2548   EVT VT = N0.getValueType();
2549   if (VT.isVector())
2550     return SDValue();
2551 
2552   EVT CarryVT = N->getValueType(1);
2553   SDLoc DL(N);
2554 
2555   // If the flag result is dead, turn this into an SUB.
2556   if (!N->hasAnyUseOfValue(1))
2557     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2558                      DAG.getUNDEF(CarryVT));
2559 
2560   // fold (usubo x, x) -> 0 + no borrow
2561   if (N0 == N1)
2562     return CombineTo(N, DAG.getConstant(0, DL, VT),
2563                      DAG.getConstant(0, DL, CarryVT));
2564 
2565   // fold (usubo x, 0) -> x + no borrow
2566   if (isNullConstant(N1))
2567     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2568 
2569   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2570   if (isAllOnesConstant(N0))
2571     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2572                      DAG.getConstant(0, DL, CarryVT));
2573 
2574   return SDValue();
2575 }
2576 
2577 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2578   SDValue N0 = N->getOperand(0);
2579   SDValue N1 = N->getOperand(1);
2580   SDValue CarryIn = N->getOperand(2);
2581 
2582   // fold (sube x, y, false) -> (subc x, y)
2583   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2584     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2585 
2586   return SDValue();
2587 }
2588 
2589 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2590   SDValue N0 = N->getOperand(0);
2591   SDValue N1 = N->getOperand(1);
2592   SDValue CarryIn = N->getOperand(2);
2593 
2594   // fold (subcarry x, y, false) -> (usubo x, y)
2595   if (isNullConstant(CarryIn))
2596     return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2597 
2598   return SDValue();
2599 }
2600 
2601 SDValue DAGCombiner::visitMUL(SDNode *N) {
2602   SDValue N0 = N->getOperand(0);
2603   SDValue N1 = N->getOperand(1);
2604   EVT VT = N0.getValueType();
2605 
2606   // fold (mul x, undef) -> 0
2607   if (N0.isUndef() || N1.isUndef())
2608     return DAG.getConstant(0, SDLoc(N), VT);
2609 
2610   bool N0IsConst = false;
2611   bool N1IsConst = false;
2612   bool N1IsOpaqueConst = false;
2613   bool N0IsOpaqueConst = false;
2614   APInt ConstValue0, ConstValue1;
2615   // fold vector ops
2616   if (VT.isVector()) {
2617     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2618       return FoldedVOp;
2619 
2620     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2621     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2622     assert((!N0IsConst ||
2623             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2624            "Splat APInt should be element width");
2625     assert((!N1IsConst ||
2626             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2627            "Splat APInt should be element width");
2628   } else {
2629     N0IsConst = isa<ConstantSDNode>(N0);
2630     if (N0IsConst) {
2631       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2632       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2633     }
2634     N1IsConst = isa<ConstantSDNode>(N1);
2635     if (N1IsConst) {
2636       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2637       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2638     }
2639   }
2640 
2641   // fold (mul c1, c2) -> c1*c2
2642   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2643     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2644                                       N0.getNode(), N1.getNode());
2645 
2646   // canonicalize constant to RHS (vector doesn't have to splat)
2647   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2648      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2649     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2650   // fold (mul x, 0) -> 0
2651   if (N1IsConst && ConstValue1.isNullValue())
2652     return N1;
2653   // fold (mul x, 1) -> x
2654   if (N1IsConst && ConstValue1.isOneValue())
2655     return N0;
2656 
2657   if (SDValue NewSel = foldBinOpIntoSelect(N))
2658     return NewSel;
2659 
2660   // fold (mul x, -1) -> 0-x
2661   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2662     SDLoc DL(N);
2663     return DAG.getNode(ISD::SUB, DL, VT,
2664                        DAG.getConstant(0, DL, VT), N0);
2665   }
2666   // fold (mul x, (1 << c)) -> x << c
2667   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2668       DAG.isKnownToBeAPowerOfTwo(N1) &&
2669       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2670     SDLoc DL(N);
2671     SDValue LogBase2 = BuildLogBase2(N1, DL);
2672     AddToWorklist(LogBase2.getNode());
2673 
2674     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2675     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2676     AddToWorklist(Trunc.getNode());
2677     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2678   }
2679   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2680   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2681     unsigned Log2Val = (-ConstValue1).logBase2();
2682     SDLoc DL(N);
2683     // FIXME: If the input is something that is easily negated (e.g. a
2684     // single-use add), we should put the negate there.
2685     return DAG.getNode(ISD::SUB, DL, VT,
2686                        DAG.getConstant(0, DL, VT),
2687                        DAG.getNode(ISD::SHL, DL, VT, N0,
2688                             DAG.getConstant(Log2Val, DL,
2689                                       getShiftAmountTy(N0.getValueType()))));
2690   }
2691 
2692   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2693   if (N0.getOpcode() == ISD::SHL &&
2694       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2695       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2696     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2697     if (isConstantOrConstantVector(C3))
2698       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2699   }
2700 
2701   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2702   // use.
2703   {
2704     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2705 
2706     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2707     if (N0.getOpcode() == ISD::SHL &&
2708         isConstantOrConstantVector(N0.getOperand(1)) &&
2709         N0.getNode()->hasOneUse()) {
2710       Sh = N0; Y = N1;
2711     } else if (N1.getOpcode() == ISD::SHL &&
2712                isConstantOrConstantVector(N1.getOperand(1)) &&
2713                N1.getNode()->hasOneUse()) {
2714       Sh = N1; Y = N0;
2715     }
2716 
2717     if (Sh.getNode()) {
2718       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2719       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2720     }
2721   }
2722 
2723   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2724   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2725       N0.getOpcode() == ISD::ADD &&
2726       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2727       isMulAddWithConstProfitable(N, N0, N1))
2728       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2729                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2730                                      N0.getOperand(0), N1),
2731                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2732                                      N0.getOperand(1), N1));
2733 
2734   // reassociate mul
2735   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2736     return RMUL;
2737 
2738   return SDValue();
2739 }
2740 
2741 /// Return true if divmod libcall is available.
2742 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2743                                      const TargetLowering &TLI) {
2744   RTLIB::Libcall LC;
2745   EVT NodeType = Node->getValueType(0);
2746   if (!NodeType.isSimple())
2747     return false;
2748   switch (NodeType.getSimpleVT().SimpleTy) {
2749   default: return false; // No libcall for vector types.
2750   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2751   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2752   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2753   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2754   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2755   }
2756 
2757   return TLI.getLibcallName(LC) != nullptr;
2758 }
2759 
2760 /// Issue divrem if both quotient and remainder are needed.
2761 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2762   if (Node->use_empty())
2763     return SDValue(); // This is a dead node, leave it alone.
2764 
2765   unsigned Opcode = Node->getOpcode();
2766   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2767   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2768 
2769   // DivMod lib calls can still work on non-legal types if using lib-calls.
2770   EVT VT = Node->getValueType(0);
2771   if (VT.isVector() || !VT.isInteger())
2772     return SDValue();
2773 
2774   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2775     return SDValue();
2776 
2777   // If DIVREM is going to get expanded into a libcall,
2778   // but there is no libcall available, then don't combine.
2779   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2780       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2781     return SDValue();
2782 
2783   // If div is legal, it's better to do the normal expansion
2784   unsigned OtherOpcode = 0;
2785   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2786     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2787     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2788       return SDValue();
2789   } else {
2790     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2791     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2792       return SDValue();
2793   }
2794 
2795   SDValue Op0 = Node->getOperand(0);
2796   SDValue Op1 = Node->getOperand(1);
2797   SDValue combined;
2798   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2799          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2800     SDNode *User = *UI;
2801     if (User == Node || User->use_empty())
2802       continue;
2803     // Convert the other matching node(s), too;
2804     // otherwise, the DIVREM may get target-legalized into something
2805     // target-specific that we won't be able to recognize.
2806     unsigned UserOpc = User->getOpcode();
2807     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2808         User->getOperand(0) == Op0 &&
2809         User->getOperand(1) == Op1) {
2810       if (!combined) {
2811         if (UserOpc == OtherOpcode) {
2812           SDVTList VTs = DAG.getVTList(VT, VT);
2813           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2814         } else if (UserOpc == DivRemOpc) {
2815           combined = SDValue(User, 0);
2816         } else {
2817           assert(UserOpc == Opcode);
2818           continue;
2819         }
2820       }
2821       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2822         CombineTo(User, combined);
2823       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2824         CombineTo(User, combined.getValue(1));
2825     }
2826   }
2827   return combined;
2828 }
2829 
2830 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2831   SDValue N0 = N->getOperand(0);
2832   SDValue N1 = N->getOperand(1);
2833   EVT VT = N->getValueType(0);
2834   SDLoc DL(N);
2835 
2836   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2837     return DAG.getUNDEF(VT);
2838 
2839   // undef / X -> 0
2840   // undef % X -> 0
2841   if (N0.isUndef())
2842     return DAG.getConstant(0, DL, VT);
2843 
2844   return SDValue();
2845 }
2846 
2847 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2848   SDValue N0 = N->getOperand(0);
2849   SDValue N1 = N->getOperand(1);
2850   EVT VT = N->getValueType(0);
2851 
2852   // fold vector ops
2853   if (VT.isVector())
2854     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2855       return FoldedVOp;
2856 
2857   SDLoc DL(N);
2858 
2859   // fold (sdiv c1, c2) -> c1/c2
2860   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2861   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2862   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2863     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2864   // fold (sdiv X, 1) -> X
2865   if (N1C && N1C->isOne())
2866     return N0;
2867   // fold (sdiv X, -1) -> 0-X
2868   if (N1C && N1C->isAllOnesValue())
2869     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2870 
2871   if (SDValue V = simplifyDivRem(N, DAG))
2872     return V;
2873 
2874   if (SDValue NewSel = foldBinOpIntoSelect(N))
2875     return NewSel;
2876 
2877   // If we know the sign bits of both operands are zero, strength reduce to a
2878   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
2879   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2880     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2881 
2882   // fold (sdiv X, pow2) -> simple ops after legalize
2883   // FIXME: We check for the exact bit here because the generic lowering gives
2884   // better results in that case. The target-specific lowering should learn how
2885   // to handle exact sdivs efficiently.
2886   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2887       !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2888                                     (-N1C->getAPIntValue()).isPowerOf2())) {
2889     // Target-specific implementation of sdiv x, pow2.
2890     if (SDValue Res = BuildSDIVPow2(N))
2891       return Res;
2892 
2893     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2894 
2895     // Splat the sign bit into the register
2896     SDValue SGN =
2897         DAG.getNode(ISD::SRA, DL, VT, N0,
2898                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2899                                     getShiftAmountTy(N0.getValueType())));
2900     AddToWorklist(SGN.getNode());
2901 
2902     // Add (N0 < 0) ? abs2 - 1 : 0;
2903     SDValue SRL =
2904         DAG.getNode(ISD::SRL, DL, VT, SGN,
2905                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2906                                     getShiftAmountTy(SGN.getValueType())));
2907     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2908     AddToWorklist(SRL.getNode());
2909     AddToWorklist(ADD.getNode());    // Divide by pow2
2910     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2911                   DAG.getConstant(lg2, DL,
2912                                   getShiftAmountTy(ADD.getValueType())));
2913 
2914     // If we're dividing by a positive value, we're done.  Otherwise, we must
2915     // negate the result.
2916     if (N1C->getAPIntValue().isNonNegative())
2917       return SRA;
2918 
2919     AddToWorklist(SRA.getNode());
2920     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2921   }
2922 
2923   // If integer divide is expensive and we satisfy the requirements, emit an
2924   // alternate sequence.  Targets may check function attributes for size/speed
2925   // trade-offs.
2926   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
2927   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2928     if (SDValue Op = BuildSDIV(N))
2929       return Op;
2930 
2931   // sdiv, srem -> sdivrem
2932   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2933   // true.  Otherwise, we break the simplification logic in visitREM().
2934   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2935     if (SDValue DivRem = useDivRem(N))
2936         return DivRem;
2937 
2938   return SDValue();
2939 }
2940 
2941 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2942   SDValue N0 = N->getOperand(0);
2943   SDValue N1 = N->getOperand(1);
2944   EVT VT = N->getValueType(0);
2945 
2946   // fold vector ops
2947   if (VT.isVector())
2948     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2949       return FoldedVOp;
2950 
2951   SDLoc DL(N);
2952 
2953   // fold (udiv c1, c2) -> c1/c2
2954   ConstantSDNode *N0C = isConstOrConstSplat(N0);
2955   ConstantSDNode *N1C = isConstOrConstSplat(N1);
2956   if (N0C && N1C)
2957     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2958                                                     N0C, N1C))
2959       return Folded;
2960 
2961   if (SDValue V = simplifyDivRem(N, DAG))
2962     return V;
2963 
2964   if (SDValue NewSel = foldBinOpIntoSelect(N))
2965     return NewSel;
2966 
2967   // fold (udiv x, (1 << c)) -> x >>u c
2968   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2969       DAG.isKnownToBeAPowerOfTwo(N1)) {
2970     SDValue LogBase2 = BuildLogBase2(N1, DL);
2971     AddToWorklist(LogBase2.getNode());
2972 
2973     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2974     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2975     AddToWorklist(Trunc.getNode());
2976     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2977   }
2978 
2979   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2980   if (N1.getOpcode() == ISD::SHL) {
2981     SDValue N10 = N1.getOperand(0);
2982     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2983         DAG.isKnownToBeAPowerOfTwo(N10)) {
2984       SDValue LogBase2 = BuildLogBase2(N10, DL);
2985       AddToWorklist(LogBase2.getNode());
2986 
2987       EVT ADDVT = N1.getOperand(1).getValueType();
2988       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2989       AddToWorklist(Trunc.getNode());
2990       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2991       AddToWorklist(Add.getNode());
2992       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2993     }
2994   }
2995 
2996   // fold (udiv x, c) -> alternate
2997   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
2998   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2999     if (SDValue Op = BuildUDIV(N))
3000       return Op;
3001 
3002   // sdiv, srem -> sdivrem
3003   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3004   // true.  Otherwise, we break the simplification logic in visitREM().
3005   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3006     if (SDValue DivRem = useDivRem(N))
3007         return DivRem;
3008 
3009   return SDValue();
3010 }
3011 
3012 // handles ISD::SREM and ISD::UREM
3013 SDValue DAGCombiner::visitREM(SDNode *N) {
3014   unsigned Opcode = N->getOpcode();
3015   SDValue N0 = N->getOperand(0);
3016   SDValue N1 = N->getOperand(1);
3017   EVT VT = N->getValueType(0);
3018   bool isSigned = (Opcode == ISD::SREM);
3019   SDLoc DL(N);
3020 
3021   // fold (rem c1, c2) -> c1%c2
3022   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3023   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3024   if (N0C && N1C)
3025     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3026       return Folded;
3027 
3028   if (SDValue V = simplifyDivRem(N, DAG))
3029     return V;
3030 
3031   if (SDValue NewSel = foldBinOpIntoSelect(N))
3032     return NewSel;
3033 
3034   if (isSigned) {
3035     // If we know the sign bits of both operands are zero, strength reduce to a
3036     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3037     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3038       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3039   } else {
3040     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3041     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3042       // fold (urem x, pow2) -> (and x, pow2-1)
3043       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3044       AddToWorklist(Add.getNode());
3045       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3046     }
3047     if (N1.getOpcode() == ISD::SHL &&
3048         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3049       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3050       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3051       AddToWorklist(Add.getNode());
3052       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3053     }
3054   }
3055 
3056   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3057 
3058   // If X/C can be simplified by the division-by-constant logic, lower
3059   // X%C to the equivalent of X-X/C*C.
3060   // To avoid mangling nodes, this simplification requires that the combine()
3061   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3062   // against this by skipping the simplification if isIntDivCheap().  When
3063   // div is not cheap, combine will not return a DIVREM.  Regardless,
3064   // checking cheapness here makes sense since the simplification results in
3065   // fatter code.
3066   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3067     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3068     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3069     AddToWorklist(Div.getNode());
3070     SDValue OptimizedDiv = combine(Div.getNode());
3071     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3072       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3073              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3074       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3075       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3076       AddToWorklist(Mul.getNode());
3077       return Sub;
3078     }
3079   }
3080 
3081   // sdiv, srem -> sdivrem
3082   if (SDValue DivRem = useDivRem(N))
3083     return DivRem.getValue(1);
3084 
3085   return SDValue();
3086 }
3087 
3088 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3089   SDValue N0 = N->getOperand(0);
3090   SDValue N1 = N->getOperand(1);
3091   EVT VT = N->getValueType(0);
3092   SDLoc DL(N);
3093 
3094   if (VT.isVector()) {
3095     // fold (mulhs x, 0) -> 0
3096     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3097       return N1;
3098     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3099       return N0;
3100   }
3101 
3102   // fold (mulhs x, 0) -> 0
3103   if (isNullConstant(N1))
3104     return N1;
3105   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3106   if (isOneConstant(N1))
3107     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3108                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3109                                        getShiftAmountTy(N0.getValueType())));
3110 
3111   // fold (mulhs x, undef) -> 0
3112   if (N0.isUndef() || N1.isUndef())
3113     return DAG.getConstant(0, DL, VT);
3114 
3115   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3116   // plus a shift.
3117   if (VT.isSimple() && !VT.isVector()) {
3118     MVT Simple = VT.getSimpleVT();
3119     unsigned SimpleSize = Simple.getSizeInBits();
3120     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3121     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3122       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3123       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3124       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3125       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3126             DAG.getConstant(SimpleSize, DL,
3127                             getShiftAmountTy(N1.getValueType())));
3128       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3129     }
3130   }
3131 
3132   return SDValue();
3133 }
3134 
3135 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3136   SDValue N0 = N->getOperand(0);
3137   SDValue N1 = N->getOperand(1);
3138   EVT VT = N->getValueType(0);
3139   SDLoc DL(N);
3140 
3141   if (VT.isVector()) {
3142     // fold (mulhu x, 0) -> 0
3143     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3144       return N1;
3145     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3146       return N0;
3147   }
3148 
3149   // fold (mulhu x, 0) -> 0
3150   if (isNullConstant(N1))
3151     return N1;
3152   // fold (mulhu x, 1) -> 0
3153   if (isOneConstant(N1))
3154     return DAG.getConstant(0, DL, N0.getValueType());
3155   // fold (mulhu x, undef) -> 0
3156   if (N0.isUndef() || N1.isUndef())
3157     return DAG.getConstant(0, DL, VT);
3158 
3159   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3160   // plus a shift.
3161   if (VT.isSimple() && !VT.isVector()) {
3162     MVT Simple = VT.getSimpleVT();
3163     unsigned SimpleSize = Simple.getSizeInBits();
3164     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3165     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3166       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3167       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3168       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3169       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3170             DAG.getConstant(SimpleSize, DL,
3171                             getShiftAmountTy(N1.getValueType())));
3172       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3173     }
3174   }
3175 
3176   return SDValue();
3177 }
3178 
3179 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3180 /// give the opcodes for the two computations that are being performed. Return
3181 /// true if a simplification was made.
3182 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3183                                                 unsigned HiOp) {
3184   // If the high half is not needed, just compute the low half.
3185   bool HiExists = N->hasAnyUseOfValue(1);
3186   if (!HiExists &&
3187       (!LegalOperations ||
3188        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3189     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3190     return CombineTo(N, Res, Res);
3191   }
3192 
3193   // If the low half is not needed, just compute the high half.
3194   bool LoExists = N->hasAnyUseOfValue(0);
3195   if (!LoExists &&
3196       (!LegalOperations ||
3197        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3198     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3199     return CombineTo(N, Res, Res);
3200   }
3201 
3202   // If both halves are used, return as it is.
3203   if (LoExists && HiExists)
3204     return SDValue();
3205 
3206   // If the two computed results can be simplified separately, separate them.
3207   if (LoExists) {
3208     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3209     AddToWorklist(Lo.getNode());
3210     SDValue LoOpt = combine(Lo.getNode());
3211     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3212         (!LegalOperations ||
3213          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3214       return CombineTo(N, LoOpt, LoOpt);
3215   }
3216 
3217   if (HiExists) {
3218     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3219     AddToWorklist(Hi.getNode());
3220     SDValue HiOpt = combine(Hi.getNode());
3221     if (HiOpt.getNode() && HiOpt != Hi &&
3222         (!LegalOperations ||
3223          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3224       return CombineTo(N, HiOpt, HiOpt);
3225   }
3226 
3227   return SDValue();
3228 }
3229 
3230 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3231   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3232     return Res;
3233 
3234   EVT VT = N->getValueType(0);
3235   SDLoc DL(N);
3236 
3237   // If the type is twice as wide is legal, transform the mulhu to a wider
3238   // multiply plus a shift.
3239   if (VT.isSimple() && !VT.isVector()) {
3240     MVT Simple = VT.getSimpleVT();
3241     unsigned SimpleSize = Simple.getSizeInBits();
3242     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3243     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3244       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3245       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3246       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3247       // Compute the high part as N1.
3248       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3249             DAG.getConstant(SimpleSize, DL,
3250                             getShiftAmountTy(Lo.getValueType())));
3251       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3252       // Compute the low part as N0.
3253       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3254       return CombineTo(N, Lo, Hi);
3255     }
3256   }
3257 
3258   return SDValue();
3259 }
3260 
3261 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3262   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3263     return Res;
3264 
3265   EVT VT = N->getValueType(0);
3266   SDLoc DL(N);
3267 
3268   // If the type is twice as wide is legal, transform the mulhu to a wider
3269   // multiply plus a shift.
3270   if (VT.isSimple() && !VT.isVector()) {
3271     MVT Simple = VT.getSimpleVT();
3272     unsigned SimpleSize = Simple.getSizeInBits();
3273     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3274     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3275       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3276       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3277       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3278       // Compute the high part as N1.
3279       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3280             DAG.getConstant(SimpleSize, DL,
3281                             getShiftAmountTy(Lo.getValueType())));
3282       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3283       // Compute the low part as N0.
3284       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3285       return CombineTo(N, Lo, Hi);
3286     }
3287   }
3288 
3289   return SDValue();
3290 }
3291 
3292 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3293   // (smulo x, 2) -> (saddo x, x)
3294   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3295     if (C2->getAPIntValue() == 2)
3296       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3297                          N->getOperand(0), N->getOperand(0));
3298 
3299   return SDValue();
3300 }
3301 
3302 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3303   // (umulo x, 2) -> (uaddo x, x)
3304   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3305     if (C2->getAPIntValue() == 2)
3306       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3307                          N->getOperand(0), N->getOperand(0));
3308 
3309   return SDValue();
3310 }
3311 
3312 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3313   SDValue N0 = N->getOperand(0);
3314   SDValue N1 = N->getOperand(1);
3315   EVT VT = N0.getValueType();
3316 
3317   // fold vector ops
3318   if (VT.isVector())
3319     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3320       return FoldedVOp;
3321 
3322   // fold operation with constant operands.
3323   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3324   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3325   if (N0C && N1C)
3326     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3327 
3328   // canonicalize constant to RHS
3329   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3330      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3331     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3332 
3333   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3334   // Only do this if the current op isn't legal and the flipped is.
3335   unsigned Opcode = N->getOpcode();
3336   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3337   if (!TLI.isOperationLegal(Opcode, VT) &&
3338       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3339       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3340     unsigned AltOpcode;
3341     switch (Opcode) {
3342     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3343     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3344     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3345     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3346     default: llvm_unreachable("Unknown MINMAX opcode");
3347     }
3348     if (TLI.isOperationLegal(AltOpcode, VT))
3349       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3350   }
3351 
3352   return SDValue();
3353 }
3354 
3355 /// If this is a binary operator with two operands of the same opcode, try to
3356 /// simplify it.
3357 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3358   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3359   EVT VT = N0.getValueType();
3360   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3361 
3362   // Bail early if none of these transforms apply.
3363   if (N0.getNumOperands() == 0) return SDValue();
3364 
3365   // For each of OP in AND/OR/XOR:
3366   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3367   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3368   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3369   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3370   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3371   //
3372   // do not sink logical op inside of a vector extend, since it may combine
3373   // into a vsetcc.
3374   EVT Op0VT = N0.getOperand(0).getValueType();
3375   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3376        N0.getOpcode() == ISD::SIGN_EXTEND ||
3377        N0.getOpcode() == ISD::BSWAP ||
3378        // Avoid infinite looping with PromoteIntBinOp.
3379        (N0.getOpcode() == ISD::ANY_EXTEND &&
3380         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3381        (N0.getOpcode() == ISD::TRUNCATE &&
3382         (!TLI.isZExtFree(VT, Op0VT) ||
3383          !TLI.isTruncateFree(Op0VT, VT)) &&
3384         TLI.isTypeLegal(Op0VT))) &&
3385       !VT.isVector() &&
3386       Op0VT == N1.getOperand(0).getValueType() &&
3387       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3388     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3389                                  N0.getOperand(0).getValueType(),
3390                                  N0.getOperand(0), N1.getOperand(0));
3391     AddToWorklist(ORNode.getNode());
3392     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3393   }
3394 
3395   // For each of OP in SHL/SRL/SRA/AND...
3396   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3397   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3398   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3399   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3400        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3401       N0.getOperand(1) == N1.getOperand(1)) {
3402     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3403                                  N0.getOperand(0).getValueType(),
3404                                  N0.getOperand(0), N1.getOperand(0));
3405     AddToWorklist(ORNode.getNode());
3406     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3407                        ORNode, N0.getOperand(1));
3408   }
3409 
3410   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3411   // Only perform this optimization up until type legalization, before
3412   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3413   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3414   // we don't want to undo this promotion.
3415   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3416   // on scalars.
3417   if ((N0.getOpcode() == ISD::BITCAST ||
3418        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3419        Level <= AfterLegalizeTypes) {
3420     SDValue In0 = N0.getOperand(0);
3421     SDValue In1 = N1.getOperand(0);
3422     EVT In0Ty = In0.getValueType();
3423     EVT In1Ty = In1.getValueType();
3424     SDLoc DL(N);
3425     // If both incoming values are integers, and the original types are the
3426     // same.
3427     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3428       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3429       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3430       AddToWorklist(Op.getNode());
3431       return BC;
3432     }
3433   }
3434 
3435   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3436   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3437   // If both shuffles use the same mask, and both shuffle within a single
3438   // vector, then it is worthwhile to move the swizzle after the operation.
3439   // The type-legalizer generates this pattern when loading illegal
3440   // vector types from memory. In many cases this allows additional shuffle
3441   // optimizations.
3442   // There are other cases where moving the shuffle after the xor/and/or
3443   // is profitable even if shuffles don't perform a swizzle.
3444   // If both shuffles use the same mask, and both shuffles have the same first
3445   // or second operand, then it might still be profitable to move the shuffle
3446   // after the xor/and/or operation.
3447   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3448     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3449     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3450 
3451     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3452            "Inputs to shuffles are not the same type");
3453 
3454     // Check that both shuffles use the same mask. The masks are known to be of
3455     // the same length because the result vector type is the same.
3456     // Check also that shuffles have only one use to avoid introducing extra
3457     // instructions.
3458     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3459         SVN0->getMask().equals(SVN1->getMask())) {
3460       SDValue ShOp = N0->getOperand(1);
3461 
3462       // Don't try to fold this node if it requires introducing a
3463       // build vector of all zeros that might be illegal at this stage.
3464       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3465         if (!LegalTypes)
3466           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3467         else
3468           ShOp = SDValue();
3469       }
3470 
3471       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3472       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
3473       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3474       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3475         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3476                                       N0->getOperand(0), N1->getOperand(0));
3477         AddToWorklist(NewNode.getNode());
3478         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3479                                     SVN0->getMask());
3480       }
3481 
3482       // Don't try to fold this node if it requires introducing a
3483       // build vector of all zeros that might be illegal at this stage.
3484       ShOp = N0->getOperand(0);
3485       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3486         if (!LegalTypes)
3487           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3488         else
3489           ShOp = SDValue();
3490       }
3491 
3492       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3493       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
3494       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3495       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3496         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3497                                       N0->getOperand(1), N1->getOperand(1));
3498         AddToWorklist(NewNode.getNode());
3499         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3500                                     SVN0->getMask());
3501       }
3502     }
3503   }
3504 
3505   return SDValue();
3506 }
3507 
3508 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3509 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3510                                        const SDLoc &DL) {
3511   SDValue LL, LR, RL, RR, N0CC, N1CC;
3512   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3513       !isSetCCEquivalent(N1, RL, RR, N1CC))
3514     return SDValue();
3515 
3516   assert(N0.getValueType() == N1.getValueType() &&
3517          "Unexpected operand types for bitwise logic op");
3518   assert(LL.getValueType() == LR.getValueType() &&
3519          RL.getValueType() == RR.getValueType() &&
3520          "Unexpected operand types for setcc");
3521 
3522   // If we're here post-legalization or the logic op type is not i1, the logic
3523   // op type must match a setcc result type. Also, all folds require new
3524   // operations on the left and right operands, so those types must match.
3525   EVT VT = N0.getValueType();
3526   EVT OpVT = LL.getValueType();
3527   if (LegalOperations || VT.getScalarType() != MVT::i1)
3528     if (VT != getSetCCResultType(OpVT))
3529       return SDValue();
3530   if (OpVT != RL.getValueType())
3531     return SDValue();
3532 
3533   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3534   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3535   bool IsInteger = OpVT.isInteger();
3536   if (LR == RR && CC0 == CC1 && IsInteger) {
3537     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3538     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3539 
3540     // All bits clear?
3541     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3542     // All sign bits clear?
3543     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3544     // Any bits set?
3545     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3546     // Any sign bits set?
3547     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3548 
3549     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3550     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3551     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3552     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3553     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3554       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3555       AddToWorklist(Or.getNode());
3556       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3557     }
3558 
3559     // All bits set?
3560     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3561     // All sign bits set?
3562     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3563     // Any bits clear?
3564     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3565     // Any sign bits clear?
3566     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3567 
3568     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3569     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3570     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3571     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3572     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3573       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3574       AddToWorklist(And.getNode());
3575       return DAG.getSetCC(DL, VT, And, LR, CC1);
3576     }
3577   }
3578 
3579   // TODO: What is the 'or' equivalent of this fold?
3580   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3581   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3582       IsInteger && CC0 == ISD::SETNE &&
3583       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3584        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3585     SDValue One = DAG.getConstant(1, DL, OpVT);
3586     SDValue Two = DAG.getConstant(2, DL, OpVT);
3587     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3588     AddToWorklist(Add.getNode());
3589     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3590   }
3591 
3592   // Try more general transforms if the predicates match and the only user of
3593   // the compares is the 'and' or 'or'.
3594   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3595       N0.hasOneUse() && N1.hasOneUse()) {
3596     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3597     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3598     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3599       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3600       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3601       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3602       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3603       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3604     }
3605   }
3606 
3607   // Canonicalize equivalent operands to LL == RL.
3608   if (LL == RR && LR == RL) {
3609     CC1 = ISD::getSetCCSwappedOperands(CC1);
3610     std::swap(RL, RR);
3611   }
3612 
3613   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3614   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3615   if (LL == RL && LR == RR) {
3616     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3617                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3618     if (NewCC != ISD::SETCC_INVALID &&
3619         (!LegalOperations ||
3620          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3621           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3622       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3623   }
3624 
3625   return SDValue();
3626 }
3627 
3628 /// This contains all DAGCombine rules which reduce two values combined by
3629 /// an And operation to a single value. This makes them reusable in the context
3630 /// of visitSELECT(). Rules involving constants are not included as
3631 /// visitSELECT() already handles those cases.
3632 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3633   EVT VT = N1.getValueType();
3634   SDLoc DL(N);
3635 
3636   // fold (and x, undef) -> 0
3637   if (N0.isUndef() || N1.isUndef())
3638     return DAG.getConstant(0, DL, VT);
3639 
3640   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3641     return V;
3642 
3643   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3644       VT.getSizeInBits() <= 64) {
3645     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3646       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3647         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3648         // immediate for an add, but it is legal if its top c2 bits are set,
3649         // transform the ADD so the immediate doesn't need to be materialized
3650         // in a register.
3651         APInt ADDC = ADDI->getAPIntValue();
3652         APInt SRLC = SRLI->getAPIntValue();
3653         if (ADDC.getMinSignedBits() <= 64 &&
3654             SRLC.ult(VT.getSizeInBits()) &&
3655             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3656           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3657                                              SRLC.getZExtValue());
3658           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3659             ADDC |= Mask;
3660             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3661               SDLoc DL0(N0);
3662               SDValue NewAdd =
3663                 DAG.getNode(ISD::ADD, DL0, VT,
3664                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3665               CombineTo(N0.getNode(), NewAdd);
3666               // Return N so it doesn't get rechecked!
3667               return SDValue(N, 0);
3668             }
3669           }
3670         }
3671       }
3672     }
3673   }
3674 
3675   // Reduce bit extract of low half of an integer to the narrower type.
3676   // (and (srl i64:x, K), KMask) ->
3677   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3678   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3679     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3680       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3681         unsigned Size = VT.getSizeInBits();
3682         const APInt &AndMask = CAnd->getAPIntValue();
3683         unsigned ShiftBits = CShift->getZExtValue();
3684 
3685         // Bail out, this node will probably disappear anyway.
3686         if (ShiftBits == 0)
3687           return SDValue();
3688 
3689         unsigned MaskBits = AndMask.countTrailingOnes();
3690         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3691 
3692         if (AndMask.isMask() &&
3693             // Required bits must not span the two halves of the integer and
3694             // must fit in the half size type.
3695             (ShiftBits + MaskBits <= Size / 2) &&
3696             TLI.isNarrowingProfitable(VT, HalfVT) &&
3697             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3698             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3699             TLI.isTruncateFree(VT, HalfVT) &&
3700             TLI.isZExtFree(HalfVT, VT)) {
3701           // The isNarrowingProfitable is to avoid regressions on PPC and
3702           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3703           // on downstream users of this. Those patterns could probably be
3704           // extended to handle extensions mixed in.
3705 
3706           SDValue SL(N0);
3707           assert(MaskBits <= Size);
3708 
3709           // Extracting the highest bit of the low half.
3710           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3711           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3712                                       N0.getOperand(0));
3713 
3714           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3715           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3716           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3717           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3718           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3719         }
3720       }
3721     }
3722   }
3723 
3724   return SDValue();
3725 }
3726 
3727 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3728                                    EVT LoadResultTy, EVT &ExtVT) {
3729   if (!AndC->getAPIntValue().isMask())
3730     return false;
3731 
3732   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3733 
3734   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3735   EVT LoadedVT = LoadN->getMemoryVT();
3736 
3737   if (ExtVT == LoadedVT &&
3738       (!LegalOperations ||
3739        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3740     // ZEXTLOAD will match without needing to change the size of the value being
3741     // loaded.
3742     return true;
3743   }
3744 
3745   // Do not change the width of a volatile load.
3746   if (LoadN->isVolatile())
3747     return false;
3748 
3749   // Do not generate loads of non-round integer types since these can
3750   // be expensive (and would be wrong if the type is not byte sized).
3751   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3752     return false;
3753 
3754   if (LegalOperations &&
3755       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3756     return false;
3757 
3758   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3759     return false;
3760 
3761   return true;
3762 }
3763 
3764 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3765                                     EVT &ExtVT, unsigned ShAmt) {
3766   // Don't transform one with multiple uses, this would require adding a new
3767   // load.
3768   if (!SDValue(LoadN, 0).hasOneUse())
3769     return false;
3770 
3771   if (LegalOperations &&
3772       !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3773     return false;
3774 
3775   // Do not generate loads of non-round integer types since these can
3776   // be expensive (and would be wrong if the type is not byte sized).
3777   if (!ExtVT.isRound())
3778     return false;
3779 
3780   // Don't change the width of a volatile load.
3781   if (LoadN->isVolatile())
3782     return false;
3783 
3784   // Verify that we are actually reducing a load width here.
3785   if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3786     return false;
3787 
3788   // For the transform to be legal, the load must produce only two values
3789   // (the value loaded and the chain).  Don't transform a pre-increment
3790   // load, for example, which produces an extra value.  Otherwise the
3791   // transformation is not equivalent, and the downstream logic to replace
3792   // uses gets things wrong.
3793   if (LoadN->getNumValues() > 2)
3794     return false;
3795 
3796   // If the load that we're shrinking is an extload and we're not just
3797   // discarding the extension we can't simply shrink the load. Bail.
3798   // TODO: It would be possible to merge the extensions in some cases.
3799   if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
3800       LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
3801     return false;
3802 
3803   if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
3804     return false;
3805 
3806   // It's not possible to generate a constant of extended or untyped type.
3807   EVT PtrType = LoadN->getOperand(1).getValueType();
3808   if (PtrType == MVT::Untyped || PtrType.isExtended())
3809     return false;
3810 
3811   return true;
3812 }
3813 
3814 bool DAGCombiner::SearchForAndLoads(SDNode *N,
3815                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
3816                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
3817                                     ConstantSDNode *Mask,
3818                                     SDNode *&NodeToMask) {
3819   // Recursively search for the operands, looking for loads which can be
3820   // narrowed.
3821   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
3822     SDValue Op = N->getOperand(i);
3823 
3824     if (Op.getValueType().isVector())
3825       return false;
3826 
3827     // Some constants may need fixing up later if they are too large.
3828     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3829       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
3830           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
3831         NodesWithConsts.insert(N);
3832       continue;
3833     }
3834 
3835     if (!Op.hasOneUse())
3836       return false;
3837 
3838     switch(Op.getOpcode()) {
3839     case ISD::LOAD: {
3840       auto *Load = cast<LoadSDNode>(Op);
3841       EVT ExtVT;
3842       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
3843           isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
3844 
3845         // ZEXTLOAD is already small enough.
3846         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
3847             ExtVT.bitsGE(Load->getMemoryVT()))
3848           continue;
3849 
3850         // Use LE to convert equal sized loads to zext.
3851         if (ExtVT.bitsLE(Load->getMemoryVT()))
3852           Loads.insert(Load);
3853 
3854         continue;
3855       }
3856       return false;
3857     }
3858     case ISD::ZERO_EXTEND:
3859     case ISD::AssertZext: {
3860       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
3861       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3862       EVT VT = Op.getOpcode() == ISD::AssertZext ?
3863         cast<VTSDNode>(Op.getOperand(1))->getVT() :
3864         Op.getOperand(0).getValueType();
3865 
3866       // We can accept extending nodes if the mask is wider or an equal
3867       // width to the original type.
3868       if (ExtVT.bitsGE(VT))
3869         continue;
3870       break;
3871     }
3872     case ISD::OR:
3873     case ISD::XOR:
3874     case ISD::AND:
3875       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
3876                              NodeToMask))
3877         return false;
3878       continue;
3879     }
3880 
3881     // Allow one node which will masked along with any loads found.
3882     if (NodeToMask)
3883       return false;
3884     NodeToMask = Op.getNode();
3885   }
3886   return true;
3887 }
3888 
3889 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
3890   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
3891   if (!Mask)
3892     return false;
3893 
3894   if (!Mask->getAPIntValue().isMask())
3895     return false;
3896 
3897   // No need to do anything if the and directly uses a load.
3898   if (isa<LoadSDNode>(N->getOperand(0)))
3899     return false;
3900 
3901   SmallPtrSet<LoadSDNode*, 8> Loads;
3902   SmallPtrSet<SDNode*, 2> NodesWithConsts;
3903   SDNode *FixupNode = nullptr;
3904   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
3905     if (Loads.size() == 0)
3906       return false;
3907 
3908     DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
3909     SDValue MaskOp = N->getOperand(1);
3910 
3911     // If it exists, fixup the single node we allow in the tree that needs
3912     // masking.
3913     if (FixupNode) {
3914       DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
3915       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
3916                                 FixupNode->getValueType(0),
3917                                 SDValue(FixupNode, 0), MaskOp);
3918       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
3919       DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
3920                              MaskOp);
3921     }
3922 
3923     // Narrow any constants that need it.
3924     for (auto *LogicN : NodesWithConsts) {
3925       SDValue Op0 = LogicN->getOperand(0);
3926       SDValue Op1 = LogicN->getOperand(1);
3927 
3928       if (isa<ConstantSDNode>(Op0))
3929           std::swap(Op0, Op1);
3930 
3931       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
3932                                 Op1, MaskOp);
3933 
3934       DAG.UpdateNodeOperands(LogicN, Op0, And);
3935     }
3936 
3937     // Create narrow loads.
3938     for (auto *Load : Loads) {
3939       DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
3940       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
3941                                 SDValue(Load, 0), MaskOp);
3942       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
3943       DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
3944       SDValue NewLoad = ReduceLoadWidth(And.getNode());
3945       assert(NewLoad &&
3946              "Shouldn't be masking the load if it can't be narrowed");
3947       CombineTo(Load, NewLoad, NewLoad.getValue(1));
3948     }
3949     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
3950     return true;
3951   }
3952   return false;
3953 }
3954 
3955 SDValue DAGCombiner::visitAND(SDNode *N) {
3956   SDValue N0 = N->getOperand(0);
3957   SDValue N1 = N->getOperand(1);
3958   EVT VT = N1.getValueType();
3959 
3960   // x & x --> x
3961   if (N0 == N1)
3962     return N0;
3963 
3964   // fold vector ops
3965   if (VT.isVector()) {
3966     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3967       return FoldedVOp;
3968 
3969     // fold (and x, 0) -> 0, vector edition
3970     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3971       // do not return N0, because undef node may exist in N0
3972       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3973                              SDLoc(N), N0.getValueType());
3974     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3975       // do not return N1, because undef node may exist in N1
3976       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3977                              SDLoc(N), N1.getValueType());
3978 
3979     // fold (and x, -1) -> x, vector edition
3980     if (ISD::isBuildVectorAllOnes(N0.getNode()))
3981       return N1;
3982     if (ISD::isBuildVectorAllOnes(N1.getNode()))
3983       return N0;
3984   }
3985 
3986   // fold (and c1, c2) -> c1&c2
3987   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3988   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3989   if (N0C && N1C && !N1C->isOpaque())
3990     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3991   // canonicalize constant to RHS
3992   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3993      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3994     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3995   // fold (and x, -1) -> x
3996   if (isAllOnesConstant(N1))
3997     return N0;
3998   // if (and x, c) is known to be zero, return 0
3999   unsigned BitWidth = VT.getScalarSizeInBits();
4000   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4001                                    APInt::getAllOnesValue(BitWidth)))
4002     return DAG.getConstant(0, SDLoc(N), VT);
4003 
4004   if (SDValue NewSel = foldBinOpIntoSelect(N))
4005     return NewSel;
4006 
4007   // reassociate and
4008   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4009     return RAND;
4010 
4011   // Try to convert a constant mask AND into a shuffle clear mask.
4012   if (VT.isVector())
4013     if (SDValue Shuffle = XformToShuffleWithZero(N))
4014       return Shuffle;
4015 
4016   // fold (and (or x, C), D) -> D if (C & D) == D
4017   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4018     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4019   };
4020   if (N0.getOpcode() == ISD::OR &&
4021       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4022     return N1;
4023   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4024   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4025     SDValue N0Op0 = N0.getOperand(0);
4026     APInt Mask = ~N1C->getAPIntValue();
4027     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4028     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4029       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4030                                  N0.getValueType(), N0Op0);
4031 
4032       // Replace uses of the AND with uses of the Zero extend node.
4033       CombineTo(N, Zext);
4034 
4035       // We actually want to replace all uses of the any_extend with the
4036       // zero_extend, to avoid duplicating things.  This will later cause this
4037       // AND to be folded.
4038       CombineTo(N0.getNode(), Zext);
4039       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4040     }
4041   }
4042   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4043   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4044   // already be zero by virtue of the width of the base type of the load.
4045   //
4046   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4047   // more cases.
4048   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4049        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4050        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4051        N0.getOperand(0).getResNo() == 0) ||
4052       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4053     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4054                                          N0 : N0.getOperand(0) );
4055 
4056     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4057     // This can be a pure constant or a vector splat, in which case we treat the
4058     // vector as a scalar and use the splat value.
4059     APInt Constant = APInt::getNullValue(1);
4060     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4061       Constant = C->getAPIntValue();
4062     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4063       APInt SplatValue, SplatUndef;
4064       unsigned SplatBitSize;
4065       bool HasAnyUndefs;
4066       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4067                                              SplatBitSize, HasAnyUndefs);
4068       if (IsSplat) {
4069         // Undef bits can contribute to a possible optimisation if set, so
4070         // set them.
4071         SplatValue |= SplatUndef;
4072 
4073         // The splat value may be something like "0x00FFFFFF", which means 0 for
4074         // the first vector value and FF for the rest, repeating. We need a mask
4075         // that will apply equally to all members of the vector, so AND all the
4076         // lanes of the constant together.
4077         EVT VT = Vector->getValueType(0);
4078         unsigned BitWidth = VT.getScalarSizeInBits();
4079 
4080         // If the splat value has been compressed to a bitlength lower
4081         // than the size of the vector lane, we need to re-expand it to
4082         // the lane size.
4083         if (BitWidth > SplatBitSize)
4084           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4085                SplatBitSize < BitWidth;
4086                SplatBitSize = SplatBitSize * 2)
4087             SplatValue |= SplatValue.shl(SplatBitSize);
4088 
4089         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4090         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4091         if (SplatBitSize % BitWidth == 0) {
4092           Constant = APInt::getAllOnesValue(BitWidth);
4093           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4094             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4095         }
4096       }
4097     }
4098 
4099     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4100     // actually legal and isn't going to get expanded, else this is a false
4101     // optimisation.
4102     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4103                                                     Load->getValueType(0),
4104                                                     Load->getMemoryVT());
4105 
4106     // Resize the constant to the same size as the original memory access before
4107     // extension. If it is still the AllOnesValue then this AND is completely
4108     // unneeded.
4109     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4110 
4111     bool B;
4112     switch (Load->getExtensionType()) {
4113     default: B = false; break;
4114     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4115     case ISD::ZEXTLOAD:
4116     case ISD::NON_EXTLOAD: B = true; break;
4117     }
4118 
4119     if (B && Constant.isAllOnesValue()) {
4120       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4121       // preserve semantics once we get rid of the AND.
4122       SDValue NewLoad(Load, 0);
4123 
4124       // Fold the AND away. NewLoad may get replaced immediately.
4125       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4126 
4127       if (Load->getExtensionType() == ISD::EXTLOAD) {
4128         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4129                               Load->getValueType(0), SDLoc(Load),
4130                               Load->getChain(), Load->getBasePtr(),
4131                               Load->getOffset(), Load->getMemoryVT(),
4132                               Load->getMemOperand());
4133         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4134         if (Load->getNumValues() == 3) {
4135           // PRE/POST_INC loads have 3 values.
4136           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4137                            NewLoad.getValue(2) };
4138           CombineTo(Load, To, 3, true);
4139         } else {
4140           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4141         }
4142       }
4143 
4144       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4145     }
4146   }
4147 
4148   // fold (and (load x), 255) -> (zextload x, i8)
4149   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4150   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4151   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4152                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4153                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4154     if (SDValue Res = ReduceLoadWidth(N)) {
4155       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4156         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4157 
4158       AddToWorklist(N);
4159       CombineTo(LN0, Res, Res.getValue(1));
4160       return SDValue(N, 0);
4161     }
4162   }
4163 
4164   if (Level >= AfterLegalizeTypes) {
4165     // Attempt to propagate the AND back up to the leaves which, if they're
4166     // loads, can be combined to narrow loads and the AND node can be removed.
4167     // Perform after legalization so that extend nodes will already be
4168     // combined into the loads.
4169     if (BackwardsPropagateMask(N, DAG)) {
4170       return SDValue(N, 0);
4171     }
4172   }
4173 
4174   if (SDValue Combined = visitANDLike(N0, N1, N))
4175     return Combined;
4176 
4177   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4178   if (N0.getOpcode() == N1.getOpcode())
4179     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4180       return Tmp;
4181 
4182   // Masking the negated extension of a boolean is just the zero-extended
4183   // boolean:
4184   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4185   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4186   //
4187   // Note: the SimplifyDemandedBits fold below can make an information-losing
4188   // transform, and then we have no way to find this better fold.
4189   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4190     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4191       SDValue SubRHS = N0.getOperand(1);
4192       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4193           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4194         return SubRHS;
4195       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4196           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4197         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4198     }
4199   }
4200 
4201   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4202   // fold (and (sra)) -> (and (srl)) when possible.
4203   if (SimplifyDemandedBits(SDValue(N, 0)))
4204     return SDValue(N, 0);
4205 
4206   // fold (zext_inreg (extload x)) -> (zextload x)
4207   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4208     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4209     EVT MemVT = LN0->getMemoryVT();
4210     // If we zero all the possible extended bits, then we can turn this into
4211     // a zextload if we are running before legalize or the operation is legal.
4212     unsigned BitWidth = N1.getScalarValueSizeInBits();
4213     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4214                            BitWidth - MemVT.getScalarSizeInBits())) &&
4215         ((!LegalOperations && !LN0->isVolatile()) ||
4216          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4217       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4218                                        LN0->getChain(), LN0->getBasePtr(),
4219                                        MemVT, LN0->getMemOperand());
4220       AddToWorklist(N);
4221       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4222       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4223     }
4224   }
4225   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4226   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4227       N0.hasOneUse()) {
4228     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4229     EVT MemVT = LN0->getMemoryVT();
4230     // If we zero all the possible extended bits, then we can turn this into
4231     // a zextload if we are running before legalize or the operation is legal.
4232     unsigned BitWidth = N1.getScalarValueSizeInBits();
4233     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4234                            BitWidth - MemVT.getScalarSizeInBits())) &&
4235         ((!LegalOperations && !LN0->isVolatile()) ||
4236          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4237       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4238                                        LN0->getChain(), LN0->getBasePtr(),
4239                                        MemVT, LN0->getMemOperand());
4240       AddToWorklist(N);
4241       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4242       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4243     }
4244   }
4245   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4246   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4247     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4248                                            N0.getOperand(1), false))
4249       return BSwap;
4250   }
4251 
4252   return SDValue();
4253 }
4254 
4255 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4256 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4257                                         bool DemandHighBits) {
4258   if (!LegalOperations)
4259     return SDValue();
4260 
4261   EVT VT = N->getValueType(0);
4262   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4263     return SDValue();
4264   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4265     return SDValue();
4266 
4267   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4268   bool LookPassAnd0 = false;
4269   bool LookPassAnd1 = false;
4270   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4271       std::swap(N0, N1);
4272   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4273       std::swap(N0, N1);
4274   if (N0.getOpcode() == ISD::AND) {
4275     if (!N0.getNode()->hasOneUse())
4276       return SDValue();
4277     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4278     if (!N01C || N01C->getZExtValue() != 0xFF00)
4279       return SDValue();
4280     N0 = N0.getOperand(0);
4281     LookPassAnd0 = true;
4282   }
4283 
4284   if (N1.getOpcode() == ISD::AND) {
4285     if (!N1.getNode()->hasOneUse())
4286       return SDValue();
4287     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4288     if (!N11C || N11C->getZExtValue() != 0xFF)
4289       return SDValue();
4290     N1 = N1.getOperand(0);
4291     LookPassAnd1 = true;
4292   }
4293 
4294   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4295     std::swap(N0, N1);
4296   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4297     return SDValue();
4298   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4299     return SDValue();
4300 
4301   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4302   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4303   if (!N01C || !N11C)
4304     return SDValue();
4305   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4306     return SDValue();
4307 
4308   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4309   SDValue N00 = N0->getOperand(0);
4310   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4311     if (!N00.getNode()->hasOneUse())
4312       return SDValue();
4313     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4314     if (!N001C || N001C->getZExtValue() != 0xFF)
4315       return SDValue();
4316     N00 = N00.getOperand(0);
4317     LookPassAnd0 = true;
4318   }
4319 
4320   SDValue N10 = N1->getOperand(0);
4321   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4322     if (!N10.getNode()->hasOneUse())
4323       return SDValue();
4324     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4325     if (!N101C || N101C->getZExtValue() != 0xFF00)
4326       return SDValue();
4327     N10 = N10.getOperand(0);
4328     LookPassAnd1 = true;
4329   }
4330 
4331   if (N00 != N10)
4332     return SDValue();
4333 
4334   // Make sure everything beyond the low halfword gets set to zero since the SRL
4335   // 16 will clear the top bits.
4336   unsigned OpSizeInBits = VT.getSizeInBits();
4337   if (DemandHighBits && OpSizeInBits > 16) {
4338     // If the left-shift isn't masked out then the only way this is a bswap is
4339     // if all bits beyond the low 8 are 0. In that case the entire pattern
4340     // reduces to a left shift anyway: leave it for other parts of the combiner.
4341     if (!LookPassAnd0)
4342       return SDValue();
4343 
4344     // However, if the right shift isn't masked out then it might be because
4345     // it's not needed. See if we can spot that too.
4346     if (!LookPassAnd1 &&
4347         !DAG.MaskedValueIsZero(
4348             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4349       return SDValue();
4350   }
4351 
4352   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4353   if (OpSizeInBits > 16) {
4354     SDLoc DL(N);
4355     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4356                       DAG.getConstant(OpSizeInBits - 16, DL,
4357                                       getShiftAmountTy(VT)));
4358   }
4359   return Res;
4360 }
4361 
4362 /// Return true if the specified node is an element that makes up a 32-bit
4363 /// packed halfword byteswap.
4364 /// ((x & 0x000000ff) << 8) |
4365 /// ((x & 0x0000ff00) >> 8) |
4366 /// ((x & 0x00ff0000) << 8) |
4367 /// ((x & 0xff000000) >> 8)
4368 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4369   if (!N.getNode()->hasOneUse())
4370     return false;
4371 
4372   unsigned Opc = N.getOpcode();
4373   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4374     return false;
4375 
4376   SDValue N0 = N.getOperand(0);
4377   unsigned Opc0 = N0.getOpcode();
4378   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4379     return false;
4380 
4381   ConstantSDNode *N1C = nullptr;
4382   // SHL or SRL: look upstream for AND mask operand
4383   if (Opc == ISD::AND)
4384     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4385   else if (Opc0 == ISD::AND)
4386     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4387   if (!N1C)
4388     return false;
4389 
4390   unsigned MaskByteOffset;
4391   switch (N1C->getZExtValue()) {
4392   default:
4393     return false;
4394   case 0xFF:       MaskByteOffset = 0; break;
4395   case 0xFF00:     MaskByteOffset = 1; break;
4396   case 0xFF0000:   MaskByteOffset = 2; break;
4397   case 0xFF000000: MaskByteOffset = 3; break;
4398   }
4399 
4400   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4401   if (Opc == ISD::AND) {
4402     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4403       // (x >> 8) & 0xff
4404       // (x >> 8) & 0xff0000
4405       if (Opc0 != ISD::SRL)
4406         return false;
4407       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4408       if (!C || C->getZExtValue() != 8)
4409         return false;
4410     } else {
4411       // (x << 8) & 0xff00
4412       // (x << 8) & 0xff000000
4413       if (Opc0 != ISD::SHL)
4414         return false;
4415       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4416       if (!C || C->getZExtValue() != 8)
4417         return false;
4418     }
4419   } else if (Opc == ISD::SHL) {
4420     // (x & 0xff) << 8
4421     // (x & 0xff0000) << 8
4422     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4423       return false;
4424     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4425     if (!C || C->getZExtValue() != 8)
4426       return false;
4427   } else { // Opc == ISD::SRL
4428     // (x & 0xff00) >> 8
4429     // (x & 0xff000000) >> 8
4430     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4431       return false;
4432     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4433     if (!C || C->getZExtValue() != 8)
4434       return false;
4435   }
4436 
4437   if (Parts[MaskByteOffset])
4438     return false;
4439 
4440   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4441   return true;
4442 }
4443 
4444 /// Match a 32-bit packed halfword bswap. That is
4445 /// ((x & 0x000000ff) << 8) |
4446 /// ((x & 0x0000ff00) >> 8) |
4447 /// ((x & 0x00ff0000) << 8) |
4448 /// ((x & 0xff000000) >> 8)
4449 /// => (rotl (bswap x), 16)
4450 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4451   if (!LegalOperations)
4452     return SDValue();
4453 
4454   EVT VT = N->getValueType(0);
4455   if (VT != MVT::i32)
4456     return SDValue();
4457   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4458     return SDValue();
4459 
4460   // Look for either
4461   // (or (or (and), (and)), (or (and), (and)))
4462   // (or (or (or (and), (and)), (and)), (and))
4463   if (N0.getOpcode() != ISD::OR)
4464     return SDValue();
4465   SDValue N00 = N0.getOperand(0);
4466   SDValue N01 = N0.getOperand(1);
4467   SDNode *Parts[4] = {};
4468 
4469   if (N1.getOpcode() == ISD::OR &&
4470       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4471     // (or (or (and), (and)), (or (and), (and)))
4472     if (!isBSwapHWordElement(N00, Parts))
4473       return SDValue();
4474 
4475     if (!isBSwapHWordElement(N01, Parts))
4476       return SDValue();
4477     SDValue N10 = N1.getOperand(0);
4478     if (!isBSwapHWordElement(N10, Parts))
4479       return SDValue();
4480     SDValue N11 = N1.getOperand(1);
4481     if (!isBSwapHWordElement(N11, Parts))
4482       return SDValue();
4483   } else {
4484     // (or (or (or (and), (and)), (and)), (and))
4485     if (!isBSwapHWordElement(N1, Parts))
4486       return SDValue();
4487     if (!isBSwapHWordElement(N01, Parts))
4488       return SDValue();
4489     if (N00.getOpcode() != ISD::OR)
4490       return SDValue();
4491     SDValue N000 = N00.getOperand(0);
4492     if (!isBSwapHWordElement(N000, Parts))
4493       return SDValue();
4494     SDValue N001 = N00.getOperand(1);
4495     if (!isBSwapHWordElement(N001, Parts))
4496       return SDValue();
4497   }
4498 
4499   // Make sure the parts are all coming from the same node.
4500   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4501     return SDValue();
4502 
4503   SDLoc DL(N);
4504   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4505                               SDValue(Parts[0], 0));
4506 
4507   // Result of the bswap should be rotated by 16. If it's not legal, then
4508   // do  (x << 16) | (x >> 16).
4509   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4510   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4511     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4512   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4513     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4514   return DAG.getNode(ISD::OR, DL, VT,
4515                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4516                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4517 }
4518 
4519 /// This contains all DAGCombine rules which reduce two values combined by
4520 /// an Or operation to a single value \see visitANDLike().
4521 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4522   EVT VT = N1.getValueType();
4523   SDLoc DL(N);
4524 
4525   // fold (or x, undef) -> -1
4526   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4527     return DAG.getAllOnesConstant(DL, VT);
4528 
4529   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4530     return V;
4531 
4532   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4533   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4534       // Don't increase # computations.
4535       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4536     // We can only do this xform if we know that bits from X that are set in C2
4537     // but not in C1 are already zero.  Likewise for Y.
4538     if (const ConstantSDNode *N0O1C =
4539         getAsNonOpaqueConstant(N0.getOperand(1))) {
4540       if (const ConstantSDNode *N1O1C =
4541           getAsNonOpaqueConstant(N1.getOperand(1))) {
4542         // We can only do this xform if we know that bits from X that are set in
4543         // C2 but not in C1 are already zero.  Likewise for Y.
4544         const APInt &LHSMask = N0O1C->getAPIntValue();
4545         const APInt &RHSMask = N1O1C->getAPIntValue();
4546 
4547         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4548             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4549           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4550                                   N0.getOperand(0), N1.getOperand(0));
4551           return DAG.getNode(ISD::AND, DL, VT, X,
4552                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4553         }
4554       }
4555     }
4556   }
4557 
4558   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4559   if (N0.getOpcode() == ISD::AND &&
4560       N1.getOpcode() == ISD::AND &&
4561       N0.getOperand(0) == N1.getOperand(0) &&
4562       // Don't increase # computations.
4563       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4564     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4565                             N0.getOperand(1), N1.getOperand(1));
4566     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4567   }
4568 
4569   return SDValue();
4570 }
4571 
4572 SDValue DAGCombiner::visitOR(SDNode *N) {
4573   SDValue N0 = N->getOperand(0);
4574   SDValue N1 = N->getOperand(1);
4575   EVT VT = N1.getValueType();
4576 
4577   // x | x --> x
4578   if (N0 == N1)
4579     return N0;
4580 
4581   // fold vector ops
4582   if (VT.isVector()) {
4583     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4584       return FoldedVOp;
4585 
4586     // fold (or x, 0) -> x, vector edition
4587     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4588       return N1;
4589     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4590       return N0;
4591 
4592     // fold (or x, -1) -> -1, vector edition
4593     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4594       // do not return N0, because undef node may exist in N0
4595       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4596     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4597       // do not return N1, because undef node may exist in N1
4598       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4599 
4600     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4601     // Do this only if the resulting shuffle is legal.
4602     if (isa<ShuffleVectorSDNode>(N0) &&
4603         isa<ShuffleVectorSDNode>(N1) &&
4604         // Avoid folding a node with illegal type.
4605         TLI.isTypeLegal(VT)) {
4606       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4607       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4608       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4609       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4610       // Ensure both shuffles have a zero input.
4611       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4612         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4613         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4614         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4615         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4616         bool CanFold = true;
4617         int NumElts = VT.getVectorNumElements();
4618         SmallVector<int, 4> Mask(NumElts);
4619 
4620         for (int i = 0; i != NumElts; ++i) {
4621           int M0 = SV0->getMaskElt(i);
4622           int M1 = SV1->getMaskElt(i);
4623 
4624           // Determine if either index is pointing to a zero vector.
4625           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4626           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4627 
4628           // If one element is zero and the otherside is undef, keep undef.
4629           // This also handles the case that both are undef.
4630           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4631             Mask[i] = -1;
4632             continue;
4633           }
4634 
4635           // Make sure only one of the elements is zero.
4636           if (M0Zero == M1Zero) {
4637             CanFold = false;
4638             break;
4639           }
4640 
4641           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4642 
4643           // We have a zero and non-zero element. If the non-zero came from
4644           // SV0 make the index a LHS index. If it came from SV1, make it
4645           // a RHS index. We need to mod by NumElts because we don't care
4646           // which operand it came from in the original shuffles.
4647           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4648         }
4649 
4650         if (CanFold) {
4651           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4652           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4653 
4654           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4655           if (!LegalMask) {
4656             std::swap(NewLHS, NewRHS);
4657             ShuffleVectorSDNode::commuteMask(Mask);
4658             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4659           }
4660 
4661           if (LegalMask)
4662             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4663         }
4664       }
4665     }
4666   }
4667 
4668   // fold (or c1, c2) -> c1|c2
4669   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4670   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4671   if (N0C && N1C && !N1C->isOpaque())
4672     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4673   // canonicalize constant to RHS
4674   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4675      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4676     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4677   // fold (or x, 0) -> x
4678   if (isNullConstant(N1))
4679     return N0;
4680   // fold (or x, -1) -> -1
4681   if (isAllOnesConstant(N1))
4682     return N1;
4683 
4684   if (SDValue NewSel = foldBinOpIntoSelect(N))
4685     return NewSel;
4686 
4687   // fold (or x, c) -> c iff (x & ~c) == 0
4688   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4689     return N1;
4690 
4691   if (SDValue Combined = visitORLike(N0, N1, N))
4692     return Combined;
4693 
4694   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4695   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4696     return BSwap;
4697   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4698     return BSwap;
4699 
4700   // reassociate or
4701   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4702     return ROR;
4703 
4704   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4705   // iff (c1 & c2) != 0.
4706   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4707     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4708   };
4709   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4710       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4711     if (SDValue COR = DAG.FoldConstantArithmetic(
4712             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4713       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4714       AddToWorklist(IOR.getNode());
4715       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4716     }
4717   }
4718 
4719   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4720   if (N0.getOpcode() == N1.getOpcode())
4721     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4722       return Tmp;
4723 
4724   // See if this is some rotate idiom.
4725   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4726     return SDValue(Rot, 0);
4727 
4728   if (SDValue Load = MatchLoadCombine(N))
4729     return Load;
4730 
4731   // Simplify the operands using demanded-bits information.
4732   if (SimplifyDemandedBits(SDValue(N, 0)))
4733     return SDValue(N, 0);
4734 
4735   return SDValue();
4736 }
4737 
4738 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4739 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4740   if (Op.getOpcode() == ISD::AND) {
4741     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4742       Mask = Op.getOperand(1);
4743       Op = Op.getOperand(0);
4744     } else {
4745       return false;
4746     }
4747   }
4748 
4749   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4750     Shift = Op;
4751     return true;
4752   }
4753 
4754   return false;
4755 }
4756 
4757 // Return true if we can prove that, whenever Neg and Pos are both in the
4758 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4759 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4760 //
4761 //     (or (shift1 X, Neg), (shift2 X, Pos))
4762 //
4763 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4764 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4765 // to consider shift amounts with defined behavior.
4766 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4767   // If EltSize is a power of 2 then:
4768   //
4769   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4770   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4771   //
4772   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4773   // for the stronger condition:
4774   //
4775   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
4776   //
4777   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4778   // we can just replace Neg with Neg' for the rest of the function.
4779   //
4780   // In other cases we check for the even stronger condition:
4781   //
4782   //     Neg == EltSize - Pos                                    [B]
4783   //
4784   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
4785   // behavior if Pos == 0 (and consequently Neg == EltSize).
4786   //
4787   // We could actually use [A] whenever EltSize is a power of 2, but the
4788   // only extra cases that it would match are those uninteresting ones
4789   // where Neg and Pos are never in range at the same time.  E.g. for
4790   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4791   // as well as (sub 32, Pos), but:
4792   //
4793   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4794   //
4795   // always invokes undefined behavior for 32-bit X.
4796   //
4797   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4798   unsigned MaskLoBits = 0;
4799   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4800     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4801       if (NegC->getAPIntValue() == EltSize - 1) {
4802         Neg = Neg.getOperand(0);
4803         MaskLoBits = Log2_64(EltSize);
4804       }
4805     }
4806   }
4807 
4808   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4809   if (Neg.getOpcode() != ISD::SUB)
4810     return false;
4811   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
4812   if (!NegC)
4813     return false;
4814   SDValue NegOp1 = Neg.getOperand(1);
4815 
4816   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4817   // Pos'.  The truncation is redundant for the purpose of the equality.
4818   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4819     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4820       if (PosC->getAPIntValue() == EltSize - 1)
4821         Pos = Pos.getOperand(0);
4822 
4823   // The condition we need is now:
4824   //
4825   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4826   //
4827   // If NegOp1 == Pos then we need:
4828   //
4829   //              EltSize & Mask == NegC & Mask
4830   //
4831   // (because "x & Mask" is a truncation and distributes through subtraction).
4832   APInt Width;
4833   if (Pos == NegOp1)
4834     Width = NegC->getAPIntValue();
4835 
4836   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4837   // Then the condition we want to prove becomes:
4838   //
4839   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4840   //
4841   // which, again because "x & Mask" is a truncation, becomes:
4842   //
4843   //                NegC & Mask == (EltSize - PosC) & Mask
4844   //             EltSize & Mask == (NegC + PosC) & Mask
4845   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4846     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4847       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4848     else
4849       return false;
4850   } else
4851     return false;
4852 
4853   // Now we just need to check that EltSize & Mask == Width & Mask.
4854   if (MaskLoBits)
4855     // EltSize & Mask is 0 since Mask is EltSize - 1.
4856     return Width.getLoBits(MaskLoBits) == 0;
4857   return Width == EltSize;
4858 }
4859 
4860 // A subroutine of MatchRotate used once we have found an OR of two opposite
4861 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
4862 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4863 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
4864 // Neg with outer conversions stripped away.
4865 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4866                                        SDValue Neg, SDValue InnerPos,
4867                                        SDValue InnerNeg, unsigned PosOpcode,
4868                                        unsigned NegOpcode, const SDLoc &DL) {
4869   // fold (or (shl x, (*ext y)),
4870   //          (srl x, (*ext (sub 32, y)))) ->
4871   //   (rotl x, y) or (rotr x, (sub 32, y))
4872   //
4873   // fold (or (shl x, (*ext (sub 32, y))),
4874   //          (srl x, (*ext y))) ->
4875   //   (rotr x, y) or (rotl x, (sub 32, y))
4876   EVT VT = Shifted.getValueType();
4877   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4878     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4879     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4880                        HasPos ? Pos : Neg).getNode();
4881   }
4882 
4883   return nullptr;
4884 }
4885 
4886 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
4887 // idioms for rotate, and if the target supports rotation instructions, generate
4888 // a rot[lr].
4889 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4890   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
4891   EVT VT = LHS.getValueType();
4892   if (!TLI.isTypeLegal(VT)) return nullptr;
4893 
4894   // The target must have at least one rotate flavor.
4895   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4896   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4897   if (!HasROTL && !HasROTR) return nullptr;
4898 
4899   // Check for truncated rotate.
4900   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
4901       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
4902     assert(LHS.getValueType() == RHS.getValueType());
4903     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
4904       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
4905                          SDValue(Rot, 0)).getNode();
4906     }
4907   }
4908 
4909   // Match "(X shl/srl V1) & V2" where V2 may not be present.
4910   SDValue LHSShift;   // The shift.
4911   SDValue LHSMask;    // AND value if any.
4912   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4913     return nullptr; // Not part of a rotate.
4914 
4915   SDValue RHSShift;   // The shift.
4916   SDValue RHSMask;    // AND value if any.
4917   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4918     return nullptr; // Not part of a rotate.
4919 
4920   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4921     return nullptr;   // Not shifting the same value.
4922 
4923   if (LHSShift.getOpcode() == RHSShift.getOpcode())
4924     return nullptr;   // Shifts must disagree.
4925 
4926   // Canonicalize shl to left side in a shl/srl pair.
4927   if (RHSShift.getOpcode() == ISD::SHL) {
4928     std::swap(LHS, RHS);
4929     std::swap(LHSShift, RHSShift);
4930     std::swap(LHSMask, RHSMask);
4931   }
4932 
4933   unsigned EltSizeInBits = VT.getScalarSizeInBits();
4934   SDValue LHSShiftArg = LHSShift.getOperand(0);
4935   SDValue LHSShiftAmt = LHSShift.getOperand(1);
4936   SDValue RHSShiftArg = RHSShift.getOperand(0);
4937   SDValue RHSShiftAmt = RHSShift.getOperand(1);
4938 
4939   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4940   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4941   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4942                                         ConstantSDNode *RHS) {
4943     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4944   };
4945   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4946     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4947                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4948 
4949     // If there is an AND of either shifted operand, apply it to the result.
4950     if (LHSMask.getNode() || RHSMask.getNode()) {
4951       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4952       SDValue Mask = AllOnes;
4953 
4954       if (LHSMask.getNode()) {
4955         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4956         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4957                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4958       }
4959       if (RHSMask.getNode()) {
4960         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4961         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4962                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4963       }
4964 
4965       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4966     }
4967 
4968     return Rot.getNode();
4969   }
4970 
4971   // If there is a mask here, and we have a variable shift, we can't be sure
4972   // that we're masking out the right stuff.
4973   if (LHSMask.getNode() || RHSMask.getNode())
4974     return nullptr;
4975 
4976   // If the shift amount is sign/zext/any-extended just peel it off.
4977   SDValue LExtOp0 = LHSShiftAmt;
4978   SDValue RExtOp0 = RHSShiftAmt;
4979   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4980        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4981        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4982        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4983       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4984        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4985        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4986        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4987     LExtOp0 = LHSShiftAmt.getOperand(0);
4988     RExtOp0 = RHSShiftAmt.getOperand(0);
4989   }
4990 
4991   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4992                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4993   if (TryL)
4994     return TryL;
4995 
4996   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4997                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4998   if (TryR)
4999     return TryR;
5000 
5001   return nullptr;
5002 }
5003 
5004 namespace {
5005 
5006 /// Represents known origin of an individual byte in load combine pattern. The
5007 /// value of the byte is either constant zero or comes from memory.
5008 struct ByteProvider {
5009   // For constant zero providers Load is set to nullptr. For memory providers
5010   // Load represents the node which loads the byte from memory.
5011   // ByteOffset is the offset of the byte in the value produced by the load.
5012   LoadSDNode *Load = nullptr;
5013   unsigned ByteOffset = 0;
5014 
5015   ByteProvider() = default;
5016 
5017   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5018     return ByteProvider(Load, ByteOffset);
5019   }
5020 
5021   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5022 
5023   bool isConstantZero() const { return !Load; }
5024   bool isMemory() const { return Load; }
5025 
5026   bool operator==(const ByteProvider &Other) const {
5027     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5028   }
5029 
5030 private:
5031   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5032       : Load(Load), ByteOffset(ByteOffset) {}
5033 };
5034 
5035 } // end anonymous namespace
5036 
5037 /// Recursively traverses the expression calculating the origin of the requested
5038 /// byte of the given value. Returns None if the provider can't be calculated.
5039 ///
5040 /// For all the values except the root of the expression verifies that the value
5041 /// has exactly one use and if it's not true return None. This way if the origin
5042 /// of the byte is returned it's guaranteed that the values which contribute to
5043 /// the byte are not used outside of this expression.
5044 ///
5045 /// Because the parts of the expression are not allowed to have more than one
5046 /// use this function iterates over trees, not DAGs. So it never visits the same
5047 /// node more than once.
5048 static const Optional<ByteProvider>
5049 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5050                       bool Root = false) {
5051   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5052   if (Depth == 10)
5053     return None;
5054 
5055   if (!Root && !Op.hasOneUse())
5056     return None;
5057 
5058   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5059   unsigned BitWidth = Op.getValueSizeInBits();
5060   if (BitWidth % 8 != 0)
5061     return None;
5062   unsigned ByteWidth = BitWidth / 8;
5063   assert(Index < ByteWidth && "invalid index requested");
5064   (void) ByteWidth;
5065 
5066   switch (Op.getOpcode()) {
5067   case ISD::OR: {
5068     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5069     if (!LHS)
5070       return None;
5071     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5072     if (!RHS)
5073       return None;
5074 
5075     if (LHS->isConstantZero())
5076       return RHS;
5077     if (RHS->isConstantZero())
5078       return LHS;
5079     return None;
5080   }
5081   case ISD::SHL: {
5082     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5083     if (!ShiftOp)
5084       return None;
5085 
5086     uint64_t BitShift = ShiftOp->getZExtValue();
5087     if (BitShift % 8 != 0)
5088       return None;
5089     uint64_t ByteShift = BitShift / 8;
5090 
5091     return Index < ByteShift
5092                ? ByteProvider::getConstantZero()
5093                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5094                                        Depth + 1);
5095   }
5096   case ISD::ANY_EXTEND:
5097   case ISD::SIGN_EXTEND:
5098   case ISD::ZERO_EXTEND: {
5099     SDValue NarrowOp = Op->getOperand(0);
5100     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5101     if (NarrowBitWidth % 8 != 0)
5102       return None;
5103     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5104 
5105     if (Index >= NarrowByteWidth)
5106       return Op.getOpcode() == ISD::ZERO_EXTEND
5107                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5108                  : None;
5109     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5110   }
5111   case ISD::BSWAP:
5112     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5113                                  Depth + 1);
5114   case ISD::LOAD: {
5115     auto L = cast<LoadSDNode>(Op.getNode());
5116     if (L->isVolatile() || L->isIndexed())
5117       return None;
5118 
5119     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5120     if (NarrowBitWidth % 8 != 0)
5121       return None;
5122     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5123 
5124     if (Index >= NarrowByteWidth)
5125       return L->getExtensionType() == ISD::ZEXTLOAD
5126                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5127                  : None;
5128     return ByteProvider::getMemory(L, Index);
5129   }
5130   }
5131 
5132   return None;
5133 }
5134 
5135 /// Match a pattern where a wide type scalar value is loaded by several narrow
5136 /// loads and combined by shifts and ors. Fold it into a single load or a load
5137 /// and a BSWAP if the targets supports it.
5138 ///
5139 /// Assuming little endian target:
5140 ///  i8 *a = ...
5141 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5142 /// =>
5143 ///  i32 val = *((i32)a)
5144 ///
5145 ///  i8 *a = ...
5146 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5147 /// =>
5148 ///  i32 val = BSWAP(*((i32)a))
5149 ///
5150 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5151 /// interact well with the worklist mechanism. When a part of the pattern is
5152 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5153 /// but the root node of the pattern which triggers the load combine is not
5154 /// necessarily a direct user of the changed node. For example, once the address
5155 /// of t28 load is reassociated load combine won't be triggered:
5156 ///             t25: i32 = add t4, Constant:i32<2>
5157 ///           t26: i64 = sign_extend t25
5158 ///        t27: i64 = add t2, t26
5159 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5160 ///     t29: i32 = zero_extend t28
5161 ///   t32: i32 = shl t29, Constant:i8<8>
5162 /// t33: i32 = or t23, t32
5163 /// As a possible fix visitLoad can check if the load can be a part of a load
5164 /// combine pattern and add corresponding OR roots to the worklist.
5165 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5166   assert(N->getOpcode() == ISD::OR &&
5167          "Can only match load combining against OR nodes");
5168 
5169   // Handles simple types only
5170   EVT VT = N->getValueType(0);
5171   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5172     return SDValue();
5173   unsigned ByteWidth = VT.getSizeInBits() / 8;
5174 
5175   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5176   // Before legalize we can introduce too wide illegal loads which will be later
5177   // split into legal sized loads. This enables us to combine i64 load by i8
5178   // patterns to a couple of i32 loads on 32 bit targets.
5179   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5180     return SDValue();
5181 
5182   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5183     unsigned BW, unsigned i) { return i; };
5184   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5185     unsigned BW, unsigned i) { return BW - i - 1; };
5186 
5187   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5188   auto MemoryByteOffset = [&] (ByteProvider P) {
5189     assert(P.isMemory() && "Must be a memory byte provider");
5190     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5191     assert(LoadBitWidth % 8 == 0 &&
5192            "can only analyze providers for individual bytes not bit");
5193     unsigned LoadByteWidth = LoadBitWidth / 8;
5194     return IsBigEndianTarget
5195             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5196             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5197   };
5198 
5199   Optional<BaseIndexOffset> Base;
5200   SDValue Chain;
5201 
5202   SmallSet<LoadSDNode *, 8> Loads;
5203   Optional<ByteProvider> FirstByteProvider;
5204   int64_t FirstOffset = INT64_MAX;
5205 
5206   // Check if all the bytes of the OR we are looking at are loaded from the same
5207   // base address. Collect bytes offsets from Base address in ByteOffsets.
5208   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5209   for (unsigned i = 0; i < ByteWidth; i++) {
5210     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5211     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5212       return SDValue();
5213 
5214     LoadSDNode *L = P->Load;
5215     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5216            "Must be enforced by calculateByteProvider");
5217     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5218 
5219     // All loads must share the same chain
5220     SDValue LChain = L->getChain();
5221     if (!Chain)
5222       Chain = LChain;
5223     else if (Chain != LChain)
5224       return SDValue();
5225 
5226     // Loads must share the same base address
5227     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5228     int64_t ByteOffsetFromBase = 0;
5229     if (!Base)
5230       Base = Ptr;
5231     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5232       return SDValue();
5233 
5234     // Calculate the offset of the current byte from the base address
5235     ByteOffsetFromBase += MemoryByteOffset(*P);
5236     ByteOffsets[i] = ByteOffsetFromBase;
5237 
5238     // Remember the first byte load
5239     if (ByteOffsetFromBase < FirstOffset) {
5240       FirstByteProvider = P;
5241       FirstOffset = ByteOffsetFromBase;
5242     }
5243 
5244     Loads.insert(L);
5245   }
5246   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5247          "memory, so there must be at least one load which produces the value");
5248   assert(Base && "Base address of the accessed memory location must be set");
5249   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5250 
5251   // Check if the bytes of the OR we are looking at match with either big or
5252   // little endian value load
5253   bool BigEndian = true, LittleEndian = true;
5254   for (unsigned i = 0; i < ByteWidth; i++) {
5255     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5256     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5257     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5258     if (!BigEndian && !LittleEndian)
5259       return SDValue();
5260   }
5261   assert((BigEndian != LittleEndian) && "should be either or");
5262   assert(FirstByteProvider && "must be set");
5263 
5264   // Ensure that the first byte is loaded from zero offset of the first load.
5265   // So the combined value can be loaded from the first load address.
5266   if (MemoryByteOffset(*FirstByteProvider) != 0)
5267     return SDValue();
5268   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5269 
5270   // The node we are looking at matches with the pattern, check if we can
5271   // replace it with a single load and bswap if needed.
5272 
5273   // If the load needs byte swap check if the target supports it
5274   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5275 
5276   // Before legalize we can introduce illegal bswaps which will be later
5277   // converted to an explicit bswap sequence. This way we end up with a single
5278   // load and byte shuffling instead of several loads and byte shuffling.
5279   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5280     return SDValue();
5281 
5282   // Check that a load of the wide type is both allowed and fast on the target
5283   bool Fast = false;
5284   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5285                                         VT, FirstLoad->getAddressSpace(),
5286                                         FirstLoad->getAlignment(), &Fast);
5287   if (!Allowed || !Fast)
5288     return SDValue();
5289 
5290   SDValue NewLoad =
5291       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5292                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5293 
5294   // Transfer chain users from old loads to the new load.
5295   for (LoadSDNode *L : Loads)
5296     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5297 
5298   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5299 }
5300 
5301 SDValue DAGCombiner::visitXOR(SDNode *N) {
5302   SDValue N0 = N->getOperand(0);
5303   SDValue N1 = N->getOperand(1);
5304   EVT VT = N0.getValueType();
5305 
5306   // fold vector ops
5307   if (VT.isVector()) {
5308     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5309       return FoldedVOp;
5310 
5311     // fold (xor x, 0) -> x, vector edition
5312     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5313       return N1;
5314     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5315       return N0;
5316   }
5317 
5318   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5319   if (N0.isUndef() && N1.isUndef())
5320     return DAG.getConstant(0, SDLoc(N), VT);
5321   // fold (xor x, undef) -> undef
5322   if (N0.isUndef())
5323     return N0;
5324   if (N1.isUndef())
5325     return N1;
5326   // fold (xor c1, c2) -> c1^c2
5327   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5328   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5329   if (N0C && N1C)
5330     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5331   // canonicalize constant to RHS
5332   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5333      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5334     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5335   // fold (xor x, 0) -> x
5336   if (isNullConstant(N1))
5337     return N0;
5338 
5339   if (SDValue NewSel = foldBinOpIntoSelect(N))
5340     return NewSel;
5341 
5342   // reassociate xor
5343   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5344     return RXOR;
5345 
5346   // fold !(x cc y) -> (x !cc y)
5347   SDValue LHS, RHS, CC;
5348   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5349     bool isInt = LHS.getValueType().isInteger();
5350     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5351                                                isInt);
5352 
5353     if (!LegalOperations ||
5354         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5355       switch (N0.getOpcode()) {
5356       default:
5357         llvm_unreachable("Unhandled SetCC Equivalent!");
5358       case ISD::SETCC:
5359         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5360       case ISD::SELECT_CC:
5361         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5362                                N0.getOperand(3), NotCC);
5363       }
5364     }
5365   }
5366 
5367   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5368   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5369       N0.getNode()->hasOneUse() &&
5370       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5371     SDValue V = N0.getOperand(0);
5372     SDLoc DL(N0);
5373     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5374                     DAG.getConstant(1, DL, V.getValueType()));
5375     AddToWorklist(V.getNode());
5376     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5377   }
5378 
5379   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5380   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
5381       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5382     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5383     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5384       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5385       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5386       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5387       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5388       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5389     }
5390   }
5391   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5392   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
5393       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5394     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5395     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5396       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5397       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5398       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5399       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5400       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5401     }
5402   }
5403   // fold (xor (and x, y), y) -> (and (not x), y)
5404   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5405       N0->getOperand(1) == N1) {
5406     SDValue X = N0->getOperand(0);
5407     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5408     AddToWorklist(NotX.getNode());
5409     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5410   }
5411 
5412   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5413   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5414   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5415       N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5416       TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5417     if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5418       if (C->getAPIntValue() == (OpSizeInBits - 1))
5419         return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5420   }
5421 
5422   // fold (xor x, x) -> 0
5423   if (N0 == N1)
5424     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5425 
5426   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5427   // Here is a concrete example of this equivalence:
5428   // i16   x ==  14
5429   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5430   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5431   //
5432   // =>
5433   //
5434   // i16     ~1      == 0b1111111111111110
5435   // i16 rol(~1, 14) == 0b1011111111111111
5436   //
5437   // Some additional tips to help conceptualize this transform:
5438   // - Try to see the operation as placing a single zero in a value of all ones.
5439   // - There exists no value for x which would allow the result to contain zero.
5440   // - Values of x larger than the bitwidth are undefined and do not require a
5441   //   consistent result.
5442   // - Pushing the zero left requires shifting one bits in from the right.
5443   // A rotate left of ~1 is a nice way of achieving the desired result.
5444   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5445       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5446     SDLoc DL(N);
5447     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5448                        N0.getOperand(1));
5449   }
5450 
5451   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5452   if (N0.getOpcode() == N1.getOpcode())
5453     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5454       return Tmp;
5455 
5456   // Simplify the expression using non-local knowledge.
5457   if (SimplifyDemandedBits(SDValue(N, 0)))
5458     return SDValue(N, 0);
5459 
5460   return SDValue();
5461 }
5462 
5463 /// Handle transforms common to the three shifts, when the shift amount is a
5464 /// constant.
5465 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5466   SDNode *LHS = N->getOperand(0).getNode();
5467   if (!LHS->hasOneUse()) return SDValue();
5468 
5469   // We want to pull some binops through shifts, so that we have (and (shift))
5470   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5471   // thing happens with address calculations, so it's important to canonicalize
5472   // it.
5473   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5474 
5475   switch (LHS->getOpcode()) {
5476   default: return SDValue();
5477   case ISD::OR:
5478   case ISD::XOR:
5479     HighBitSet = false; // We can only transform sra if the high bit is clear.
5480     break;
5481   case ISD::AND:
5482     HighBitSet = true;  // We can only transform sra if the high bit is set.
5483     break;
5484   case ISD::ADD:
5485     if (N->getOpcode() != ISD::SHL)
5486       return SDValue(); // only shl(add) not sr[al](add).
5487     HighBitSet = false; // We can only transform sra if the high bit is clear.
5488     break;
5489   }
5490 
5491   // We require the RHS of the binop to be a constant and not opaque as well.
5492   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5493   if (!BinOpCst) return SDValue();
5494 
5495   // FIXME: disable this unless the input to the binop is a shift by a constant
5496   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5497   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5498   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5499                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5500                  BinOpLHSVal->getOpcode() == ISD::SRL;
5501   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5502                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5503 
5504   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5505       !isCopyOrSelect)
5506     return SDValue();
5507 
5508   if (isCopyOrSelect && N->hasOneUse())
5509     return SDValue();
5510 
5511   EVT VT = N->getValueType(0);
5512 
5513   // If this is a signed shift right, and the high bit is modified by the
5514   // logical operation, do not perform the transformation. The highBitSet
5515   // boolean indicates the value of the high bit of the constant which would
5516   // cause it to be modified for this operation.
5517   if (N->getOpcode() == ISD::SRA) {
5518     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5519     if (BinOpRHSSignSet != HighBitSet)
5520       return SDValue();
5521   }
5522 
5523   if (!TLI.isDesirableToCommuteWithShift(LHS))
5524     return SDValue();
5525 
5526   // Fold the constants, shifting the binop RHS by the shift amount.
5527   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5528                                N->getValueType(0),
5529                                LHS->getOperand(1), N->getOperand(1));
5530   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5531 
5532   // Create the new shift.
5533   SDValue NewShift = DAG.getNode(N->getOpcode(),
5534                                  SDLoc(LHS->getOperand(0)),
5535                                  VT, LHS->getOperand(0), N->getOperand(1));
5536 
5537   // Create the new binop.
5538   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5539 }
5540 
5541 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5542   assert(N->getOpcode() == ISD::TRUNCATE);
5543   assert(N->getOperand(0).getOpcode() == ISD::AND);
5544 
5545   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5546   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5547     SDValue N01 = N->getOperand(0).getOperand(1);
5548     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5549       SDLoc DL(N);
5550       EVT TruncVT = N->getValueType(0);
5551       SDValue N00 = N->getOperand(0).getOperand(0);
5552       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5553       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5554       AddToWorklist(Trunc00.getNode());
5555       AddToWorklist(Trunc01.getNode());
5556       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5557     }
5558   }
5559 
5560   return SDValue();
5561 }
5562 
5563 SDValue DAGCombiner::visitRotate(SDNode *N) {
5564   SDLoc dl(N);
5565   SDValue N0 = N->getOperand(0);
5566   SDValue N1 = N->getOperand(1);
5567   EVT VT = N->getValueType(0);
5568   unsigned Bitsize = VT.getScalarSizeInBits();
5569 
5570   // fold (rot x, 0) -> x
5571   if (isNullConstantOrNullSplatConstant(N1))
5572     return N0;
5573 
5574   // fold (rot x, c) -> (rot x, c % BitSize)
5575   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5576     if (Cst->getAPIntValue().uge(Bitsize)) {
5577       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5578       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5579                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5580     }
5581   }
5582 
5583   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5584   if (N1.getOpcode() == ISD::TRUNCATE &&
5585       N1.getOperand(0).getOpcode() == ISD::AND) {
5586     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5587       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5588   }
5589 
5590   unsigned NextOp = N0.getOpcode();
5591   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5592   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5593     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5594     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5595     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5596       EVT ShiftVT = C1->getValueType(0);
5597       bool SameSide = (N->getOpcode() == NextOp);
5598       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5599       if (SDValue CombinedShift =
5600               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5601         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5602         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5603             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5604             BitsizeC.getNode());
5605         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5606                            CombinedShiftNorm);
5607       }
5608     }
5609   }
5610   return SDValue();
5611 }
5612 
5613 SDValue DAGCombiner::visitSHL(SDNode *N) {
5614   SDValue N0 = N->getOperand(0);
5615   SDValue N1 = N->getOperand(1);
5616   EVT VT = N0.getValueType();
5617   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5618 
5619   // fold vector ops
5620   if (VT.isVector()) {
5621     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5622       return FoldedVOp;
5623 
5624     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5625     // If setcc produces all-one true value then:
5626     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5627     if (N1CV && N1CV->isConstant()) {
5628       if (N0.getOpcode() == ISD::AND) {
5629         SDValue N00 = N0->getOperand(0);
5630         SDValue N01 = N0->getOperand(1);
5631         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5632 
5633         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5634             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5635                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5636           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5637                                                      N01CV, N1CV))
5638             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5639         }
5640       }
5641     }
5642   }
5643 
5644   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5645 
5646   // fold (shl c1, c2) -> c1<<c2
5647   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5648   if (N0C && N1C && !N1C->isOpaque())
5649     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5650   // fold (shl 0, x) -> 0
5651   if (isNullConstantOrNullSplatConstant(N0))
5652     return N0;
5653   // fold (shl x, c >= size(x)) -> undef
5654   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5655   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5656     return Val->getAPIntValue().uge(OpSizeInBits);
5657   };
5658   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
5659     return DAG.getUNDEF(VT);
5660   // fold (shl x, 0) -> x
5661   if (N1C && N1C->isNullValue())
5662     return N0;
5663   // fold (shl undef, x) -> 0
5664   if (N0.isUndef())
5665     return DAG.getConstant(0, SDLoc(N), VT);
5666 
5667   if (SDValue NewSel = foldBinOpIntoSelect(N))
5668     return NewSel;
5669 
5670   // if (shl x, c) is known to be zero, return 0
5671   if (DAG.MaskedValueIsZero(SDValue(N, 0),
5672                             APInt::getAllOnesValue(OpSizeInBits)))
5673     return DAG.getConstant(0, SDLoc(N), VT);
5674   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
5675   if (N1.getOpcode() == ISD::TRUNCATE &&
5676       N1.getOperand(0).getOpcode() == ISD::AND) {
5677     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5678       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
5679   }
5680 
5681   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5682     return SDValue(N, 0);
5683 
5684   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
5685   if (N0.getOpcode() == ISD::SHL) {
5686     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5687                                           ConstantSDNode *RHS) {
5688       APInt c1 = LHS->getAPIntValue();
5689       APInt c2 = RHS->getAPIntValue();
5690       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5691       return (c1 + c2).uge(OpSizeInBits);
5692     };
5693     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5694       return DAG.getConstant(0, SDLoc(N), VT);
5695 
5696     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5697                                        ConstantSDNode *RHS) {
5698       APInt c1 = LHS->getAPIntValue();
5699       APInt c2 = RHS->getAPIntValue();
5700       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5701       return (c1 + c2).ult(OpSizeInBits);
5702     };
5703     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5704       SDLoc DL(N);
5705       EVT ShiftVT = N1.getValueType();
5706       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5707       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
5708     }
5709   }
5710 
5711   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
5712   // For this to be valid, the second form must not preserve any of the bits
5713   // that are shifted out by the inner shift in the first form.  This means
5714   // the outer shift size must be >= the number of bits added by the ext.
5715   // As a corollary, we don't care what kind of ext it is.
5716   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
5717               N0.getOpcode() == ISD::ANY_EXTEND ||
5718               N0.getOpcode() == ISD::SIGN_EXTEND) &&
5719       N0.getOperand(0).getOpcode() == ISD::SHL) {
5720     SDValue N0Op0 = N0.getOperand(0);
5721     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5722       APInt c1 = N0Op0C1->getAPIntValue();
5723       APInt c2 = N1C->getAPIntValue();
5724       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5725 
5726       EVT InnerShiftVT = N0Op0.getValueType();
5727       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5728       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
5729         SDLoc DL(N0);
5730         APInt Sum = c1 + c2;
5731         if (Sum.uge(OpSizeInBits))
5732           return DAG.getConstant(0, DL, VT);
5733 
5734         return DAG.getNode(
5735             ISD::SHL, DL, VT,
5736             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
5737             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5738       }
5739     }
5740   }
5741 
5742   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
5743   // Only fold this if the inner zext has no other uses to avoid increasing
5744   // the total number of instructions.
5745   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
5746       N0.getOperand(0).getOpcode() == ISD::SRL) {
5747     SDValue N0Op0 = N0.getOperand(0);
5748     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
5749       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
5750         uint64_t c1 = N0Op0C1->getZExtValue();
5751         uint64_t c2 = N1C->getZExtValue();
5752         if (c1 == c2) {
5753           SDValue NewOp0 = N0.getOperand(0);
5754           EVT CountVT = NewOp0.getOperand(1).getValueType();
5755           SDLoc DL(N);
5756           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
5757                                        NewOp0,
5758                                        DAG.getConstant(c2, DL, CountVT));
5759           AddToWorklist(NewSHL.getNode());
5760           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
5761         }
5762       }
5763     }
5764   }
5765 
5766   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
5767   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
5768   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
5769       N0->getFlags().hasExact()) {
5770     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5771       uint64_t C1 = N0C1->getZExtValue();
5772       uint64_t C2 = N1C->getZExtValue();
5773       SDLoc DL(N);
5774       if (C1 <= C2)
5775         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5776                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
5777       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
5778                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
5779     }
5780   }
5781 
5782   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
5783   //                               (and (srl x, (sub c1, c2), MASK)
5784   // Only fold this if the inner shift has no other uses -- if it does, folding
5785   // this will increase the total number of instructions.
5786   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5787     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5788       uint64_t c1 = N0C1->getZExtValue();
5789       if (c1 < OpSizeInBits) {
5790         uint64_t c2 = N1C->getZExtValue();
5791         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
5792         SDValue Shift;
5793         if (c2 > c1) {
5794           Mask <<= c2 - c1;
5795           SDLoc DL(N);
5796           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
5797                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
5798         } else {
5799           Mask.lshrInPlace(c1 - c2);
5800           SDLoc DL(N);
5801           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
5802                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
5803         }
5804         SDLoc DL(N0);
5805         return DAG.getNode(ISD::AND, DL, VT, Shift,
5806                            DAG.getConstant(Mask, DL, VT));
5807       }
5808     }
5809   }
5810 
5811   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
5812   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
5813       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
5814     SDLoc DL(N);
5815     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
5816     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
5817     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
5818   }
5819 
5820   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
5821   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
5822   // Variant of version done on multiply, except mul by a power of 2 is turned
5823   // into a shift.
5824   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
5825       N0.getNode()->hasOneUse() &&
5826       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5827       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5828     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
5829     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5830     AddToWorklist(Shl0.getNode());
5831     AddToWorklist(Shl1.getNode());
5832     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
5833   }
5834 
5835   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
5836   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
5837       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
5838       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
5839     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
5840     if (isConstantOrConstantVector(Shl))
5841       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
5842   }
5843 
5844   if (N1C && !N1C->isOpaque())
5845     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
5846       return NewSHL;
5847 
5848   return SDValue();
5849 }
5850 
5851 SDValue DAGCombiner::visitSRA(SDNode *N) {
5852   SDValue N0 = N->getOperand(0);
5853   SDValue N1 = N->getOperand(1);
5854   EVT VT = N0.getValueType();
5855   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5856 
5857   // Arithmetic shifting an all-sign-bit value is a no-op.
5858   // fold (sra 0, x) -> 0
5859   // fold (sra -1, x) -> -1
5860   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
5861     return N0;
5862 
5863   // fold vector ops
5864   if (VT.isVector())
5865     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5866       return FoldedVOp;
5867 
5868   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5869 
5870   // fold (sra c1, c2) -> (sra c1, c2)
5871   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5872   if (N0C && N1C && !N1C->isOpaque())
5873     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
5874   // fold (sra x, c >= size(x)) -> undef
5875   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5876   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5877     return Val->getAPIntValue().uge(OpSizeInBits);
5878   };
5879   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
5880     return DAG.getUNDEF(VT);
5881   // fold (sra x, 0) -> x
5882   if (N1C && N1C->isNullValue())
5883     return N0;
5884 
5885   if (SDValue NewSel = foldBinOpIntoSelect(N))
5886     return NewSel;
5887 
5888   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
5889   // sext_inreg.
5890   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
5891     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
5892     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
5893     if (VT.isVector())
5894       ExtVT = EVT::getVectorVT(*DAG.getContext(),
5895                                ExtVT, VT.getVectorNumElements());
5896     if ((!LegalOperations ||
5897          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
5898       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5899                          N0.getOperand(0), DAG.getValueType(ExtVT));
5900   }
5901 
5902   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
5903   if (N0.getOpcode() == ISD::SRA) {
5904     SDLoc DL(N);
5905     EVT ShiftVT = N1.getValueType();
5906 
5907     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
5908                                           ConstantSDNode *RHS) {
5909       APInt c1 = LHS->getAPIntValue();
5910       APInt c2 = RHS->getAPIntValue();
5911       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5912       return (c1 + c2).uge(OpSizeInBits);
5913     };
5914     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
5915       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
5916                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
5917 
5918     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
5919                                        ConstantSDNode *RHS) {
5920       APInt c1 = LHS->getAPIntValue();
5921       APInt c2 = RHS->getAPIntValue();
5922       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5923       return (c1 + c2).ult(OpSizeInBits);
5924     };
5925     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
5926       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
5927       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
5928     }
5929   }
5930 
5931   // fold (sra (shl X, m), (sub result_size, n))
5932   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
5933   // result_size - n != m.
5934   // If truncate is free for the target sext(shl) is likely to result in better
5935   // code.
5936   if (N0.getOpcode() == ISD::SHL && N1C) {
5937     // Get the two constanst of the shifts, CN0 = m, CN = n.
5938     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
5939     if (N01C) {
5940       LLVMContext &Ctx = *DAG.getContext();
5941       // Determine what the truncate's result bitsize and type would be.
5942       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
5943 
5944       if (VT.isVector())
5945         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
5946 
5947       // Determine the residual right-shift amount.
5948       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
5949 
5950       // If the shift is not a no-op (in which case this should be just a sign
5951       // extend already), the truncated to type is legal, sign_extend is legal
5952       // on that type, and the truncate to that type is both legal and free,
5953       // perform the transform.
5954       if ((ShiftAmt > 0) &&
5955           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
5956           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
5957           TLI.isTruncateFree(VT, TruncVT)) {
5958         SDLoc DL(N);
5959         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
5960             getShiftAmountTy(N0.getOperand(0).getValueType()));
5961         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
5962                                     N0.getOperand(0), Amt);
5963         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
5964                                     Shift);
5965         return DAG.getNode(ISD::SIGN_EXTEND, DL,
5966                            N->getValueType(0), Trunc);
5967       }
5968     }
5969   }
5970 
5971   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
5972   if (N1.getOpcode() == ISD::TRUNCATE &&
5973       N1.getOperand(0).getOpcode() == ISD::AND) {
5974     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5975       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
5976   }
5977 
5978   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
5979   //      if c1 is equal to the number of bits the trunc removes
5980   if (N0.getOpcode() == ISD::TRUNCATE &&
5981       (N0.getOperand(0).getOpcode() == ISD::SRL ||
5982        N0.getOperand(0).getOpcode() == ISD::SRA) &&
5983       N0.getOperand(0).hasOneUse() &&
5984       N0.getOperand(0).getOperand(1).hasOneUse() &&
5985       N1C) {
5986     SDValue N0Op0 = N0.getOperand(0);
5987     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
5988       unsigned LargeShiftVal = LargeShift->getZExtValue();
5989       EVT LargeVT = N0Op0.getValueType();
5990 
5991       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
5992         SDLoc DL(N);
5993         SDValue Amt =
5994           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
5995                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
5996         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
5997                                   N0Op0.getOperand(0), Amt);
5998         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
5999       }
6000     }
6001   }
6002 
6003   // Simplify, based on bits shifted out of the LHS.
6004   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6005     return SDValue(N, 0);
6006 
6007   // If the sign bit is known to be zero, switch this to a SRL.
6008   if (DAG.SignBitIsZero(N0))
6009     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6010 
6011   if (N1C && !N1C->isOpaque())
6012     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6013       return NewSRA;
6014 
6015   return SDValue();
6016 }
6017 
6018 SDValue DAGCombiner::visitSRL(SDNode *N) {
6019   SDValue N0 = N->getOperand(0);
6020   SDValue N1 = N->getOperand(1);
6021   EVT VT = N0.getValueType();
6022   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6023 
6024   // fold vector ops
6025   if (VT.isVector())
6026     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6027       return FoldedVOp;
6028 
6029   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6030 
6031   // fold (srl c1, c2) -> c1 >>u c2
6032   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6033   if (N0C && N1C && !N1C->isOpaque())
6034     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6035   // fold (srl 0, x) -> 0
6036   if (isNullConstantOrNullSplatConstant(N0))
6037     return N0;
6038   // fold (srl x, c >= size(x)) -> undef
6039   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6040   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6041     return Val->getAPIntValue().uge(OpSizeInBits);
6042   };
6043   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6044     return DAG.getUNDEF(VT);
6045   // fold (srl x, 0) -> x
6046   if (N1C && N1C->isNullValue())
6047     return N0;
6048 
6049   if (SDValue NewSel = foldBinOpIntoSelect(N))
6050     return NewSel;
6051 
6052   // if (srl x, c) is known to be zero, return 0
6053   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6054                                    APInt::getAllOnesValue(OpSizeInBits)))
6055     return DAG.getConstant(0, SDLoc(N), VT);
6056 
6057   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6058   if (N0.getOpcode() == ISD::SRL) {
6059     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6060                                           ConstantSDNode *RHS) {
6061       APInt c1 = LHS->getAPIntValue();
6062       APInt c2 = RHS->getAPIntValue();
6063       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6064       return (c1 + c2).uge(OpSizeInBits);
6065     };
6066     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6067       return DAG.getConstant(0, SDLoc(N), VT);
6068 
6069     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6070                                        ConstantSDNode *RHS) {
6071       APInt c1 = LHS->getAPIntValue();
6072       APInt c2 = RHS->getAPIntValue();
6073       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6074       return (c1 + c2).ult(OpSizeInBits);
6075     };
6076     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6077       SDLoc DL(N);
6078       EVT ShiftVT = N1.getValueType();
6079       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6080       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6081     }
6082   }
6083 
6084   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6085   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6086       N0.getOperand(0).getOpcode() == ISD::SRL) {
6087     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6088       uint64_t c1 = N001C->getZExtValue();
6089       uint64_t c2 = N1C->getZExtValue();
6090       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6091       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6092       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6093       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6094       if (c1 + OpSizeInBits == InnerShiftSize) {
6095         SDLoc DL(N0);
6096         if (c1 + c2 >= InnerShiftSize)
6097           return DAG.getConstant(0, DL, VT);
6098         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6099                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6100                                        N0.getOperand(0).getOperand(0),
6101                                        DAG.getConstant(c1 + c2, DL,
6102                                                        ShiftCountVT)));
6103       }
6104     }
6105   }
6106 
6107   // fold (srl (shl x, c), c) -> (and x, cst2)
6108   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6109       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6110     SDLoc DL(N);
6111     SDValue Mask =
6112         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6113     AddToWorklist(Mask.getNode());
6114     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6115   }
6116 
6117   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6118   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6119     // Shifting in all undef bits?
6120     EVT SmallVT = N0.getOperand(0).getValueType();
6121     unsigned BitSize = SmallVT.getScalarSizeInBits();
6122     if (N1C->getZExtValue() >= BitSize)
6123       return DAG.getUNDEF(VT);
6124 
6125     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6126       uint64_t ShiftAmt = N1C->getZExtValue();
6127       SDLoc DL0(N0);
6128       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6129                                        N0.getOperand(0),
6130                           DAG.getConstant(ShiftAmt, DL0,
6131                                           getShiftAmountTy(SmallVT)));
6132       AddToWorklist(SmallShift.getNode());
6133       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6134       SDLoc DL(N);
6135       return DAG.getNode(ISD::AND, DL, VT,
6136                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6137                          DAG.getConstant(Mask, DL, VT));
6138     }
6139   }
6140 
6141   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6142   // bit, which is unmodified by sra.
6143   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6144     if (N0.getOpcode() == ISD::SRA)
6145       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6146   }
6147 
6148   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6149   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6150       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6151     KnownBits Known;
6152     DAG.computeKnownBits(N0.getOperand(0), Known);
6153 
6154     // If any of the input bits are KnownOne, then the input couldn't be all
6155     // zeros, thus the result of the srl will always be zero.
6156     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6157 
6158     // If all of the bits input the to ctlz node are known to be zero, then
6159     // the result of the ctlz is "32" and the result of the shift is one.
6160     APInt UnknownBits = ~Known.Zero;
6161     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6162 
6163     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6164     if (UnknownBits.isPowerOf2()) {
6165       // Okay, we know that only that the single bit specified by UnknownBits
6166       // could be set on input to the CTLZ node. If this bit is set, the SRL
6167       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6168       // to an SRL/XOR pair, which is likely to simplify more.
6169       unsigned ShAmt = UnknownBits.countTrailingZeros();
6170       SDValue Op = N0.getOperand(0);
6171 
6172       if (ShAmt) {
6173         SDLoc DL(N0);
6174         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6175                   DAG.getConstant(ShAmt, DL,
6176                                   getShiftAmountTy(Op.getValueType())));
6177         AddToWorklist(Op.getNode());
6178       }
6179 
6180       SDLoc DL(N);
6181       return DAG.getNode(ISD::XOR, DL, VT,
6182                          Op, DAG.getConstant(1, DL, VT));
6183     }
6184   }
6185 
6186   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6187   if (N1.getOpcode() == ISD::TRUNCATE &&
6188       N1.getOperand(0).getOpcode() == ISD::AND) {
6189     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6190       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6191   }
6192 
6193   // fold operands of srl based on knowledge that the low bits are not
6194   // demanded.
6195   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6196     return SDValue(N, 0);
6197 
6198   if (N1C && !N1C->isOpaque())
6199     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6200       return NewSRL;
6201 
6202   // Attempt to convert a srl of a load into a narrower zero-extending load.
6203   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6204     return NarrowLoad;
6205 
6206   // Here is a common situation. We want to optimize:
6207   //
6208   //   %a = ...
6209   //   %b = and i32 %a, 2
6210   //   %c = srl i32 %b, 1
6211   //   brcond i32 %c ...
6212   //
6213   // into
6214   //
6215   //   %a = ...
6216   //   %b = and %a, 2
6217   //   %c = setcc eq %b, 0
6218   //   brcond %c ...
6219   //
6220   // However when after the source operand of SRL is optimized into AND, the SRL
6221   // itself may not be optimized further. Look for it and add the BRCOND into
6222   // the worklist.
6223   if (N->hasOneUse()) {
6224     SDNode *Use = *N->use_begin();
6225     if (Use->getOpcode() == ISD::BRCOND)
6226       AddToWorklist(Use);
6227     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6228       // Also look pass the truncate.
6229       Use = *Use->use_begin();
6230       if (Use->getOpcode() == ISD::BRCOND)
6231         AddToWorklist(Use);
6232     }
6233   }
6234 
6235   return SDValue();
6236 }
6237 
6238 SDValue DAGCombiner::visitABS(SDNode *N) {
6239   SDValue N0 = N->getOperand(0);
6240   EVT VT = N->getValueType(0);
6241 
6242   // fold (abs c1) -> c2
6243   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6244     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6245   // fold (abs (abs x)) -> (abs x)
6246   if (N0.getOpcode() == ISD::ABS)
6247     return N0;
6248   // fold (abs x) -> x iff not-negative
6249   if (DAG.SignBitIsZero(N0))
6250     return N0;
6251   return SDValue();
6252 }
6253 
6254 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6255   SDValue N0 = N->getOperand(0);
6256   EVT VT = N->getValueType(0);
6257 
6258   // fold (bswap c1) -> c2
6259   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6260     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6261   // fold (bswap (bswap x)) -> x
6262   if (N0.getOpcode() == ISD::BSWAP)
6263     return N0->getOperand(0);
6264   return SDValue();
6265 }
6266 
6267 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6268   SDValue N0 = N->getOperand(0);
6269   EVT VT = N->getValueType(0);
6270 
6271   // fold (bitreverse c1) -> c2
6272   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6273     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6274   // fold (bitreverse (bitreverse x)) -> x
6275   if (N0.getOpcode() == ISD::BITREVERSE)
6276     return N0.getOperand(0);
6277   return SDValue();
6278 }
6279 
6280 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6281   SDValue N0 = N->getOperand(0);
6282   EVT VT = N->getValueType(0);
6283 
6284   // fold (ctlz c1) -> c2
6285   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6286     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6287 
6288   // If the value is known never to be zero, switch to the undef version.
6289   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6290     if (DAG.isKnownNeverZero(N0))
6291       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6292   }
6293 
6294   return SDValue();
6295 }
6296 
6297 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6298   SDValue N0 = N->getOperand(0);
6299   EVT VT = N->getValueType(0);
6300 
6301   // fold (ctlz_zero_undef c1) -> c2
6302   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6303     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6304   return SDValue();
6305 }
6306 
6307 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6308   SDValue N0 = N->getOperand(0);
6309   EVT VT = N->getValueType(0);
6310 
6311   // fold (cttz c1) -> c2
6312   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6313     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6314 
6315   // If the value is known never to be zero, switch to the undef version.
6316   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
6317     if (DAG.isKnownNeverZero(N0))
6318       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6319   }
6320 
6321   return SDValue();
6322 }
6323 
6324 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6325   SDValue N0 = N->getOperand(0);
6326   EVT VT = N->getValueType(0);
6327 
6328   // fold (cttz_zero_undef c1) -> c2
6329   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6330     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6331   return SDValue();
6332 }
6333 
6334 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6335   SDValue N0 = N->getOperand(0);
6336   EVT VT = N->getValueType(0);
6337 
6338   // fold (ctpop c1) -> c2
6339   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6340     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6341   return SDValue();
6342 }
6343 
6344 /// \brief Generate Min/Max node
6345 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6346                                    SDValue RHS, SDValue True, SDValue False,
6347                                    ISD::CondCode CC, const TargetLowering &TLI,
6348                                    SelectionDAG &DAG) {
6349   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6350     return SDValue();
6351 
6352   switch (CC) {
6353   case ISD::SETOLT:
6354   case ISD::SETOLE:
6355   case ISD::SETLT:
6356   case ISD::SETLE:
6357   case ISD::SETULT:
6358   case ISD::SETULE: {
6359     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6360     if (TLI.isOperationLegal(Opcode, VT))
6361       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6362     return SDValue();
6363   }
6364   case ISD::SETOGT:
6365   case ISD::SETOGE:
6366   case ISD::SETGT:
6367   case ISD::SETGE:
6368   case ISD::SETUGT:
6369   case ISD::SETUGE: {
6370     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6371     if (TLI.isOperationLegal(Opcode, VT))
6372       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6373     return SDValue();
6374   }
6375   default:
6376     return SDValue();
6377   }
6378 }
6379 
6380 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6381   SDValue Cond = N->getOperand(0);
6382   SDValue N1 = N->getOperand(1);
6383   SDValue N2 = N->getOperand(2);
6384   EVT VT = N->getValueType(0);
6385   EVT CondVT = Cond.getValueType();
6386   SDLoc DL(N);
6387 
6388   if (!VT.isInteger())
6389     return SDValue();
6390 
6391   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6392   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6393   if (!C1 || !C2)
6394     return SDValue();
6395 
6396   // Only do this before legalization to avoid conflicting with target-specific
6397   // transforms in the other direction (create a select from a zext/sext). There
6398   // is also a target-independent combine here in DAGCombiner in the other
6399   // direction for (select Cond, -1, 0) when the condition is not i1.
6400   if (CondVT == MVT::i1 && !LegalOperations) {
6401     if (C1->isNullValue() && C2->isOne()) {
6402       // select Cond, 0, 1 --> zext (!Cond)
6403       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6404       if (VT != MVT::i1)
6405         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6406       return NotCond;
6407     }
6408     if (C1->isNullValue() && C2->isAllOnesValue()) {
6409       // select Cond, 0, -1 --> sext (!Cond)
6410       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6411       if (VT != MVT::i1)
6412         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6413       return NotCond;
6414     }
6415     if (C1->isOne() && C2->isNullValue()) {
6416       // select Cond, 1, 0 --> zext (Cond)
6417       if (VT != MVT::i1)
6418         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6419       return Cond;
6420     }
6421     if (C1->isAllOnesValue() && C2->isNullValue()) {
6422       // select Cond, -1, 0 --> sext (Cond)
6423       if (VT != MVT::i1)
6424         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6425       return Cond;
6426     }
6427 
6428     // For any constants that differ by 1, we can transform the select into an
6429     // extend and add. Use a target hook because some targets may prefer to
6430     // transform in the other direction.
6431     if (TLI.convertSelectOfConstantsToMath(VT)) {
6432       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6433         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6434         if (VT != MVT::i1)
6435           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6436         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6437       }
6438       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6439         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6440         if (VT != MVT::i1)
6441           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6442         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6443       }
6444     }
6445 
6446     return SDValue();
6447   }
6448 
6449   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6450   // We can't do this reliably if integer based booleans have different contents
6451   // to floating point based booleans. This is because we can't tell whether we
6452   // have an integer-based boolean or a floating-point-based boolean unless we
6453   // can find the SETCC that produced it and inspect its operands. This is
6454   // fairly easy if C is the SETCC node, but it can potentially be
6455   // undiscoverable (or not reasonably discoverable). For example, it could be
6456   // in another basic block or it could require searching a complicated
6457   // expression.
6458   if (CondVT.isInteger() &&
6459       TLI.getBooleanContents(false, true) ==
6460           TargetLowering::ZeroOrOneBooleanContent &&
6461       TLI.getBooleanContents(false, false) ==
6462           TargetLowering::ZeroOrOneBooleanContent &&
6463       C1->isNullValue() && C2->isOne()) {
6464     SDValue NotCond =
6465         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6466     if (VT.bitsEq(CondVT))
6467       return NotCond;
6468     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6469   }
6470 
6471   return SDValue();
6472 }
6473 
6474 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6475   SDValue N0 = N->getOperand(0);
6476   SDValue N1 = N->getOperand(1);
6477   SDValue N2 = N->getOperand(2);
6478   EVT VT = N->getValueType(0);
6479   EVT VT0 = N0.getValueType();
6480   SDLoc DL(N);
6481 
6482   // fold (select C, X, X) -> X
6483   if (N1 == N2)
6484     return N1;
6485 
6486   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6487     // fold (select true, X, Y) -> X
6488     // fold (select false, X, Y) -> Y
6489     return !N0C->isNullValue() ? N1 : N2;
6490   }
6491 
6492   // fold (select X, X, Y) -> (or X, Y)
6493   // fold (select X, 1, Y) -> (or C, Y)
6494   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6495     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6496 
6497   if (SDValue V = foldSelectOfConstants(N))
6498     return V;
6499 
6500   // fold (select C, 0, X) -> (and (not C), X)
6501   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6502     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6503     AddToWorklist(NOTNode.getNode());
6504     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6505   }
6506   // fold (select C, X, 1) -> (or (not C), X)
6507   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6508     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6509     AddToWorklist(NOTNode.getNode());
6510     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6511   }
6512   // fold (select X, Y, X) -> (and X, Y)
6513   // fold (select X, Y, 0) -> (and X, Y)
6514   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6515     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6516 
6517   // If we can fold this based on the true/false value, do so.
6518   if (SimplifySelectOps(N, N1, N2))
6519     return SDValue(N, 0); // Don't revisit N.
6520 
6521   if (VT0 == MVT::i1) {
6522     // The code in this block deals with the following 2 equivalences:
6523     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6524     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6525     // The target can specify its preferred form with the
6526     // shouldNormalizeToSelectSequence() callback. However we always transform
6527     // to the right anyway if we find the inner select exists in the DAG anyway
6528     // and we always transform to the left side if we know that we can further
6529     // optimize the combination of the conditions.
6530     bool normalizeToSequence =
6531         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6532     // select (and Cond0, Cond1), X, Y
6533     //   -> select Cond0, (select Cond1, X, Y), Y
6534     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6535       SDValue Cond0 = N0->getOperand(0);
6536       SDValue Cond1 = N0->getOperand(1);
6537       SDValue InnerSelect =
6538           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6539       if (normalizeToSequence || !InnerSelect.use_empty())
6540         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6541                            InnerSelect, N2);
6542     }
6543     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6544     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6545       SDValue Cond0 = N0->getOperand(0);
6546       SDValue Cond1 = N0->getOperand(1);
6547       SDValue InnerSelect =
6548           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6549       if (normalizeToSequence || !InnerSelect.use_empty())
6550         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6551                            InnerSelect);
6552     }
6553 
6554     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6555     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6556       SDValue N1_0 = N1->getOperand(0);
6557       SDValue N1_1 = N1->getOperand(1);
6558       SDValue N1_2 = N1->getOperand(2);
6559       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6560         // Create the actual and node if we can generate good code for it.
6561         if (!normalizeToSequence) {
6562           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6563           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6564         }
6565         // Otherwise see if we can optimize the "and" to a better pattern.
6566         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6567           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6568                              N2);
6569       }
6570     }
6571     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6572     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6573       SDValue N2_0 = N2->getOperand(0);
6574       SDValue N2_1 = N2->getOperand(1);
6575       SDValue N2_2 = N2->getOperand(2);
6576       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6577         // Create the actual or node if we can generate good code for it.
6578         if (!normalizeToSequence) {
6579           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6580           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6581         }
6582         // Otherwise see if we can optimize to a better pattern.
6583         if (SDValue Combined = visitORLike(N0, N2_0, N))
6584           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6585                              N2_2);
6586       }
6587     }
6588   }
6589 
6590   // select (xor Cond, 1), X, Y -> select Cond, Y, X
6591   if (VT0 == MVT::i1) {
6592     if (N0->getOpcode() == ISD::XOR) {
6593       if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
6594         SDValue Cond0 = N0->getOperand(0);
6595         if (C->isOne())
6596           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
6597       }
6598     }
6599   }
6600 
6601   // fold selects based on a setcc into other things, such as min/max/abs
6602   if (N0.getOpcode() == ISD::SETCC) {
6603     // select x, y (fcmp lt x, y) -> fminnum x, y
6604     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6605     //
6606     // This is OK if we don't care about what happens if either operand is a
6607     // NaN.
6608     //
6609 
6610     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6611     // no signed zeros as well as no nans.
6612     const TargetOptions &Options = DAG.getTarget().Options;
6613     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6614         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6615       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6616 
6617       if (SDValue FMinMax = combineMinNumMaxNum(
6618               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6619         return FMinMax;
6620     }
6621 
6622     if ((!LegalOperations &&
6623          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6624         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6625       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6626                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6627     return SimplifySelect(DL, N0, N1, N2);
6628   }
6629 
6630   return SDValue();
6631 }
6632 
6633 static
6634 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6635   SDLoc DL(N);
6636   EVT LoVT, HiVT;
6637   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6638 
6639   // Split the inputs.
6640   SDValue Lo, Hi, LL, LH, RL, RH;
6641   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6642   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6643 
6644   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6645   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6646 
6647   return std::make_pair(Lo, Hi);
6648 }
6649 
6650 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6651 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6652 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6653   SDLoc DL(N);
6654   SDValue Cond = N->getOperand(0);
6655   SDValue LHS = N->getOperand(1);
6656   SDValue RHS = N->getOperand(2);
6657   EVT VT = N->getValueType(0);
6658   int NumElems = VT.getVectorNumElements();
6659   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6660          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6661          Cond.getOpcode() == ISD::BUILD_VECTOR);
6662 
6663   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6664   // binary ones here.
6665   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6666     return SDValue();
6667 
6668   // We're sure we have an even number of elements due to the
6669   // concat_vectors we have as arguments to vselect.
6670   // Skip BV elements until we find one that's not an UNDEF
6671   // After we find an UNDEF element, keep looping until we get to half the
6672   // length of the BV and see if all the non-undef nodes are the same.
6673   ConstantSDNode *BottomHalf = nullptr;
6674   for (int i = 0; i < NumElems / 2; ++i) {
6675     if (Cond->getOperand(i)->isUndef())
6676       continue;
6677 
6678     if (BottomHalf == nullptr)
6679       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6680     else if (Cond->getOperand(i).getNode() != BottomHalf)
6681       return SDValue();
6682   }
6683 
6684   // Do the same for the second half of the BuildVector
6685   ConstantSDNode *TopHalf = nullptr;
6686   for (int i = NumElems / 2; i < NumElems; ++i) {
6687     if (Cond->getOperand(i)->isUndef())
6688       continue;
6689 
6690     if (TopHalf == nullptr)
6691       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
6692     else if (Cond->getOperand(i).getNode() != TopHalf)
6693       return SDValue();
6694   }
6695 
6696   assert(TopHalf && BottomHalf &&
6697          "One half of the selector was all UNDEFs and the other was all the "
6698          "same value. This should have been addressed before this function.");
6699   return DAG.getNode(
6700       ISD::CONCAT_VECTORS, DL, VT,
6701       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
6702       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
6703 }
6704 
6705 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
6706   if (Level >= AfterLegalizeTypes)
6707     return SDValue();
6708 
6709   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
6710   SDValue Mask = MSC->getMask();
6711   SDValue Data  = MSC->getValue();
6712   SDLoc DL(N);
6713 
6714   // If the MSCATTER data type requires splitting and the mask is provided by a
6715   // SETCC, then split both nodes and its operands before legalization. This
6716   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6717   // and enables future optimizations (e.g. min/max pattern matching on X86).
6718   if (Mask.getOpcode() != ISD::SETCC)
6719     return SDValue();
6720 
6721   // Check if any splitting is required.
6722   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
6723       TargetLowering::TypeSplitVector)
6724     return SDValue();
6725   SDValue MaskLo, MaskHi, Lo, Hi;
6726   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6727 
6728   EVT LoVT, HiVT;
6729   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
6730 
6731   SDValue Chain = MSC->getChain();
6732 
6733   EVT MemoryVT = MSC->getMemoryVT();
6734   unsigned Alignment = MSC->getOriginalAlignment();
6735 
6736   EVT LoMemVT, HiMemVT;
6737   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6738 
6739   SDValue DataLo, DataHi;
6740   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6741 
6742   SDValue Scale = MSC->getScale();
6743   SDValue BasePtr = MSC->getBasePtr();
6744   SDValue IndexLo, IndexHi;
6745   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
6746 
6747   MachineMemOperand *MMO = DAG.getMachineFunction().
6748     getMachineMemOperand(MSC->getPointerInfo(),
6749                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6750                           Alignment, MSC->getAAInfo(), MSC->getRanges());
6751 
6752   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
6753   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
6754                             DL, OpsLo, MMO);
6755 
6756   SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
6757   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
6758                             DL, OpsHi, MMO);
6759 
6760   AddToWorklist(Lo.getNode());
6761   AddToWorklist(Hi.getNode());
6762 
6763   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6764 }
6765 
6766 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
6767   if (Level >= AfterLegalizeTypes)
6768     return SDValue();
6769 
6770   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
6771   SDValue Mask = MST->getMask();
6772   SDValue Data  = MST->getValue();
6773   EVT VT = Data.getValueType();
6774   SDLoc DL(N);
6775 
6776   // If the MSTORE data type requires splitting and the mask is provided by a
6777   // SETCC, then split both nodes and its operands before legalization. This
6778   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6779   // and enables future optimizations (e.g. min/max pattern matching on X86).
6780   if (Mask.getOpcode() == ISD::SETCC) {
6781     // Check if any splitting is required.
6782     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6783         TargetLowering::TypeSplitVector)
6784       return SDValue();
6785 
6786     SDValue MaskLo, MaskHi, Lo, Hi;
6787     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6788 
6789     SDValue Chain = MST->getChain();
6790     SDValue Ptr   = MST->getBasePtr();
6791 
6792     EVT MemoryVT = MST->getMemoryVT();
6793     unsigned Alignment = MST->getOriginalAlignment();
6794 
6795     // if Alignment is equal to the vector size,
6796     // take the half of it for the second part
6797     unsigned SecondHalfAlignment =
6798       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
6799 
6800     EVT LoMemVT, HiMemVT;
6801     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6802 
6803     SDValue DataLo, DataHi;
6804     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
6805 
6806     MachineMemOperand *MMO = DAG.getMachineFunction().
6807       getMachineMemOperand(MST->getPointerInfo(),
6808                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
6809                            Alignment, MST->getAAInfo(), MST->getRanges());
6810 
6811     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
6812                             MST->isTruncatingStore(),
6813                             MST->isCompressingStore());
6814 
6815     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6816                                      MST->isCompressingStore());
6817     unsigned HiOffset = LoMemVT.getStoreSize();
6818 
6819     MMO = DAG.getMachineFunction().getMachineMemOperand(
6820         MST->getPointerInfo().getWithOffset(HiOffset),
6821         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
6822         MST->getAAInfo(), MST->getRanges());
6823 
6824     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
6825                             MST->isTruncatingStore(),
6826                             MST->isCompressingStore());
6827 
6828     AddToWorklist(Lo.getNode());
6829     AddToWorklist(Hi.getNode());
6830 
6831     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
6832   }
6833   return SDValue();
6834 }
6835 
6836 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
6837   if (Level >= AfterLegalizeTypes)
6838     return SDValue();
6839 
6840   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
6841   SDValue Mask = MGT->getMask();
6842   SDLoc DL(N);
6843 
6844   // If the MGATHER result requires splitting and the mask is provided by a
6845   // SETCC, then split both nodes and its operands before legalization. This
6846   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6847   // and enables future optimizations (e.g. min/max pattern matching on X86).
6848 
6849   if (Mask.getOpcode() != ISD::SETCC)
6850     return SDValue();
6851 
6852   EVT VT = N->getValueType(0);
6853 
6854   // Check if any splitting is required.
6855   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6856       TargetLowering::TypeSplitVector)
6857     return SDValue();
6858 
6859   SDValue MaskLo, MaskHi, Lo, Hi;
6860   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6861 
6862   SDValue Src0 = MGT->getValue();
6863   SDValue Src0Lo, Src0Hi;
6864   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6865 
6866   EVT LoVT, HiVT;
6867   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6868 
6869   SDValue Chain = MGT->getChain();
6870   EVT MemoryVT = MGT->getMemoryVT();
6871   unsigned Alignment = MGT->getOriginalAlignment();
6872 
6873   EVT LoMemVT, HiMemVT;
6874   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6875 
6876   SDValue Scale = MGT->getScale();
6877   SDValue BasePtr = MGT->getBasePtr();
6878   SDValue Index = MGT->getIndex();
6879   SDValue IndexLo, IndexHi;
6880   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
6881 
6882   MachineMemOperand *MMO = DAG.getMachineFunction().
6883     getMachineMemOperand(MGT->getPointerInfo(),
6884                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6885                           Alignment, MGT->getAAInfo(), MGT->getRanges());
6886 
6887   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
6888   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
6889                            MMO);
6890 
6891   SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
6892   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
6893                            MMO);
6894 
6895   AddToWorklist(Lo.getNode());
6896   AddToWorklist(Hi.getNode());
6897 
6898   // Build a factor node to remember that this load is independent of the
6899   // other one.
6900   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6901                       Hi.getValue(1));
6902 
6903   // Legalized the chain result - switch anything that used the old chain to
6904   // use the new one.
6905   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
6906 
6907   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6908 
6909   SDValue RetOps[] = { GatherRes, Chain };
6910   return DAG.getMergeValues(RetOps, DL);
6911 }
6912 
6913 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
6914   if (Level >= AfterLegalizeTypes)
6915     return SDValue();
6916 
6917   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
6918   SDValue Mask = MLD->getMask();
6919   SDLoc DL(N);
6920 
6921   // If the MLOAD result requires splitting and the mask is provided by a
6922   // SETCC, then split both nodes and its operands before legalization. This
6923   // prevents the type legalizer from unrolling SETCC into scalar comparisons
6924   // and enables future optimizations (e.g. min/max pattern matching on X86).
6925   if (Mask.getOpcode() == ISD::SETCC) {
6926     EVT VT = N->getValueType(0);
6927 
6928     // Check if any splitting is required.
6929     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
6930         TargetLowering::TypeSplitVector)
6931       return SDValue();
6932 
6933     SDValue MaskLo, MaskHi, Lo, Hi;
6934     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
6935 
6936     SDValue Src0 = MLD->getSrc0();
6937     SDValue Src0Lo, Src0Hi;
6938     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
6939 
6940     EVT LoVT, HiVT;
6941     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
6942 
6943     SDValue Chain = MLD->getChain();
6944     SDValue Ptr   = MLD->getBasePtr();
6945     EVT MemoryVT = MLD->getMemoryVT();
6946     unsigned Alignment = MLD->getOriginalAlignment();
6947 
6948     // if Alignment is equal to the vector size,
6949     // take the half of it for the second part
6950     unsigned SecondHalfAlignment =
6951       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
6952          Alignment/2 : Alignment;
6953 
6954     EVT LoMemVT, HiMemVT;
6955     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
6956 
6957     MachineMemOperand *MMO = DAG.getMachineFunction().
6958     getMachineMemOperand(MLD->getPointerInfo(),
6959                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
6960                          Alignment, MLD->getAAInfo(), MLD->getRanges());
6961 
6962     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
6963                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6964 
6965     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
6966                                      MLD->isExpandingLoad());
6967     unsigned HiOffset = LoMemVT.getStoreSize();
6968 
6969     MMO = DAG.getMachineFunction().getMachineMemOperand(
6970         MLD->getPointerInfo().getWithOffset(HiOffset),
6971         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
6972         MLD->getAAInfo(), MLD->getRanges());
6973 
6974     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
6975                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
6976 
6977     AddToWorklist(Lo.getNode());
6978     AddToWorklist(Hi.getNode());
6979 
6980     // Build a factor node to remember that this load is independent of the
6981     // other one.
6982     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
6983                         Hi.getValue(1));
6984 
6985     // Legalized the chain result - switch anything that used the old chain to
6986     // use the new one.
6987     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
6988 
6989     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6990 
6991     SDValue RetOps[] = { LoadRes, Chain };
6992     return DAG.getMergeValues(RetOps, DL);
6993   }
6994   return SDValue();
6995 }
6996 
6997 /// A vector select of 2 constant vectors can be simplified to math/logic to
6998 /// avoid a variable select instruction and possibly avoid constant loads.
6999 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7000   SDValue Cond = N->getOperand(0);
7001   SDValue N1 = N->getOperand(1);
7002   SDValue N2 = N->getOperand(2);
7003   EVT VT = N->getValueType(0);
7004   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7005       !TLI.convertSelectOfConstantsToMath(VT) ||
7006       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7007       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7008     return SDValue();
7009 
7010   // Check if we can use the condition value to increment/decrement a single
7011   // constant value. This simplifies a select to an add and removes a constant
7012   // load/materialization from the general case.
7013   bool AllAddOne = true;
7014   bool AllSubOne = true;
7015   unsigned Elts = VT.getVectorNumElements();
7016   for (unsigned i = 0; i != Elts; ++i) {
7017     SDValue N1Elt = N1.getOperand(i);
7018     SDValue N2Elt = N2.getOperand(i);
7019     if (N1Elt.isUndef() || N2Elt.isUndef())
7020       continue;
7021 
7022     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7023     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7024     if (C1 != C2 + 1)
7025       AllAddOne = false;
7026     if (C1 != C2 - 1)
7027       AllSubOne = false;
7028   }
7029 
7030   // Further simplifications for the extra-special cases where the constants are
7031   // all 0 or all -1 should be implemented as folds of these patterns.
7032   SDLoc DL(N);
7033   if (AllAddOne || AllSubOne) {
7034     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7035     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7036     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7037     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7038     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7039   }
7040 
7041   // The general case for select-of-constants:
7042   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7043   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7044   // leave that to a machine-specific pass.
7045   return SDValue();
7046 }
7047 
7048 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7049   SDValue N0 = N->getOperand(0);
7050   SDValue N1 = N->getOperand(1);
7051   SDValue N2 = N->getOperand(2);
7052   SDLoc DL(N);
7053 
7054   // fold (vselect C, X, X) -> X
7055   if (N1 == N2)
7056     return N1;
7057 
7058   // Canonicalize integer abs.
7059   // vselect (setg[te] X,  0),  X, -X ->
7060   // vselect (setgt    X, -1),  X, -X ->
7061   // vselect (setl[te] X,  0), -X,  X ->
7062   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7063   if (N0.getOpcode() == ISD::SETCC) {
7064     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7065     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7066     bool isAbs = false;
7067     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7068 
7069     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7070          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7071         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7072       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7073     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7074              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7075       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7076 
7077     if (isAbs) {
7078       EVT VT = LHS.getValueType();
7079       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7080         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7081 
7082       SDValue Shift = DAG.getNode(
7083           ISD::SRA, DL, VT, LHS,
7084           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7085       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7086       AddToWorklist(Shift.getNode());
7087       AddToWorklist(Add.getNode());
7088       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7089     }
7090   }
7091 
7092   if (SimplifySelectOps(N, N1, N2))
7093     return SDValue(N, 0);  // Don't revisit N.
7094 
7095   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7096   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7097     return N1;
7098   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7099   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7100     return N2;
7101 
7102   // The ConvertSelectToConcatVector function is assuming both the above
7103   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7104   // and addressed.
7105   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7106       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7107       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7108     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7109       return CV;
7110   }
7111 
7112   if (SDValue V = foldVSelectOfConstants(N))
7113     return V;
7114 
7115   return SDValue();
7116 }
7117 
7118 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7119   SDValue N0 = N->getOperand(0);
7120   SDValue N1 = N->getOperand(1);
7121   SDValue N2 = N->getOperand(2);
7122   SDValue N3 = N->getOperand(3);
7123   SDValue N4 = N->getOperand(4);
7124   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7125 
7126   // fold select_cc lhs, rhs, x, x, cc -> x
7127   if (N2 == N3)
7128     return N2;
7129 
7130   // Determine if the condition we're dealing with is constant
7131   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7132                                   CC, SDLoc(N), false)) {
7133     AddToWorklist(SCC.getNode());
7134 
7135     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7136       if (!SCCC->isNullValue())
7137         return N2;    // cond always true -> true val
7138       else
7139         return N3;    // cond always false -> false val
7140     } else if (SCC->isUndef()) {
7141       // When the condition is UNDEF, just return the first operand. This is
7142       // coherent the DAG creation, no setcc node is created in this case
7143       return N2;
7144     } else if (SCC.getOpcode() == ISD::SETCC) {
7145       // Fold to a simpler select_cc
7146       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7147                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7148                          SCC.getOperand(2));
7149     }
7150   }
7151 
7152   // If we can fold this based on the true/false value, do so.
7153   if (SimplifySelectOps(N, N2, N3))
7154     return SDValue(N, 0);  // Don't revisit N.
7155 
7156   // fold select_cc into other things, such as min/max/abs
7157   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7158 }
7159 
7160 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7161   // setcc is very commonly used as an argument to brcond. This pattern
7162   // also lend itself to numerous combines and, as a result, it is desired
7163   // we keep the argument to a brcond as a setcc as much as possible.
7164   bool PreferSetCC =
7165       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7166 
7167   SDValue Combined = SimplifySetCC(
7168       N->getValueType(0), N->getOperand(0), N->getOperand(1),
7169       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7170 
7171   if (!Combined)
7172     return SDValue();
7173 
7174   // If we prefer to have a setcc, and we don't, we'll try our best to
7175   // recreate one using rebuildSetCC.
7176   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7177     SDValue NewSetCC = rebuildSetCC(Combined);
7178 
7179     // We don't have anything interesting to combine to.
7180     if (NewSetCC.getNode() == N)
7181       return SDValue();
7182 
7183     if (NewSetCC)
7184       return NewSetCC;
7185   }
7186 
7187   return Combined;
7188 }
7189 
7190 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
7191   SDValue LHS = N->getOperand(0);
7192   SDValue RHS = N->getOperand(1);
7193   SDValue Carry = N->getOperand(2);
7194   SDValue Cond = N->getOperand(3);
7195 
7196   // If Carry is false, fold to a regular SETCC.
7197   if (Carry.getOpcode() == ISD::CARRY_FALSE)
7198     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7199 
7200   return SDValue();
7201 }
7202 
7203 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7204   SDValue LHS = N->getOperand(0);
7205   SDValue RHS = N->getOperand(1);
7206   SDValue Carry = N->getOperand(2);
7207   SDValue Cond = N->getOperand(3);
7208 
7209   // If Carry is false, fold to a regular SETCC.
7210   if (isNullConstant(Carry))
7211     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7212 
7213   return SDValue();
7214 }
7215 
7216 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7217 /// a build_vector of constants.
7218 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7219 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7220 /// Vector extends are not folded if operations are legal; this is to
7221 /// avoid introducing illegal build_vector dag nodes.
7222 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7223                                          SelectionDAG &DAG, bool LegalTypes,
7224                                          bool LegalOperations) {
7225   unsigned Opcode = N->getOpcode();
7226   SDValue N0 = N->getOperand(0);
7227   EVT VT = N->getValueType(0);
7228 
7229   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7230          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7231          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7232          && "Expected EXTEND dag node in input!");
7233 
7234   // fold (sext c1) -> c1
7235   // fold (zext c1) -> c1
7236   // fold (aext c1) -> c1
7237   if (isa<ConstantSDNode>(N0))
7238     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7239 
7240   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7241   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7242   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7243   EVT SVT = VT.getScalarType();
7244   if (!(VT.isVector() &&
7245       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7246       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7247     return nullptr;
7248 
7249   // We can fold this node into a build_vector.
7250   unsigned VTBits = SVT.getSizeInBits();
7251   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7252   SmallVector<SDValue, 8> Elts;
7253   unsigned NumElts = VT.getVectorNumElements();
7254   SDLoc DL(N);
7255 
7256   for (unsigned i=0; i != NumElts; ++i) {
7257     SDValue Op = N0->getOperand(i);
7258     if (Op->isUndef()) {
7259       Elts.push_back(DAG.getUNDEF(SVT));
7260       continue;
7261     }
7262 
7263     SDLoc DL(Op);
7264     // Get the constant value and if needed trunc it to the size of the type.
7265     // Nodes like build_vector might have constants wider than the scalar type.
7266     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7267     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7268       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7269     else
7270       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7271   }
7272 
7273   return DAG.getBuildVector(VT, DL, Elts).getNode();
7274 }
7275 
7276 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7277 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7278 // transformation. Returns true if extension are possible and the above
7279 // mentioned transformation is profitable.
7280 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7281                                     unsigned ExtOpc,
7282                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7283                                     const TargetLowering &TLI) {
7284   bool HasCopyToRegUses = false;
7285   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7286   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7287                             UE = N0.getNode()->use_end();
7288        UI != UE; ++UI) {
7289     SDNode *User = *UI;
7290     if (User == N)
7291       continue;
7292     if (UI.getUse().getResNo() != N0.getResNo())
7293       continue;
7294     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7295     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7296       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7297       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7298         // Sign bits will be lost after a zext.
7299         return false;
7300       bool Add = false;
7301       for (unsigned i = 0; i != 2; ++i) {
7302         SDValue UseOp = User->getOperand(i);
7303         if (UseOp == N0)
7304           continue;
7305         if (!isa<ConstantSDNode>(UseOp))
7306           return false;
7307         Add = true;
7308       }
7309       if (Add)
7310         ExtendNodes.push_back(User);
7311       continue;
7312     }
7313     // If truncates aren't free and there are users we can't
7314     // extend, it isn't worthwhile.
7315     if (!isTruncFree)
7316       return false;
7317     // Remember if this value is live-out.
7318     if (User->getOpcode() == ISD::CopyToReg)
7319       HasCopyToRegUses = true;
7320   }
7321 
7322   if (HasCopyToRegUses) {
7323     bool BothLiveOut = false;
7324     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7325          UI != UE; ++UI) {
7326       SDUse &Use = UI.getUse();
7327       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7328         BothLiveOut = true;
7329         break;
7330       }
7331     }
7332     if (BothLiveOut)
7333       // Both unextended and extended values are live out. There had better be
7334       // a good reason for the transformation.
7335       return ExtendNodes.size();
7336   }
7337   return true;
7338 }
7339 
7340 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7341                                   SDValue OrigLoad, SDValue ExtLoad,
7342                                   const SDLoc &DL, ISD::NodeType ExtType) {
7343   // Extend SetCC uses if necessary.
7344   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
7345     SDNode *SetCC = SetCCs[i];
7346     SmallVector<SDValue, 4> Ops;
7347 
7348     for (unsigned j = 0; j != 2; ++j) {
7349       SDValue SOp = SetCC->getOperand(j);
7350       if (SOp == OrigLoad)
7351         Ops.push_back(ExtLoad);
7352       else
7353         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7354     }
7355 
7356     Ops.push_back(SetCC->getOperand(2));
7357     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7358   }
7359 }
7360 
7361 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7362 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7363   SDValue N0 = N->getOperand(0);
7364   EVT DstVT = N->getValueType(0);
7365   EVT SrcVT = N0.getValueType();
7366 
7367   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7368           N->getOpcode() == ISD::ZERO_EXTEND) &&
7369          "Unexpected node type (not an extend)!");
7370 
7371   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7372   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7373   //   (v8i32 (sext (v8i16 (load x))))
7374   // into:
7375   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7376   //                          (v4i32 (sextload (x + 16)))))
7377   // Where uses of the original load, i.e.:
7378   //   (v8i16 (load x))
7379   // are replaced with:
7380   //   (v8i16 (truncate
7381   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7382   //                            (v4i32 (sextload (x + 16)))))))
7383   //
7384   // This combine is only applicable to illegal, but splittable, vectors.
7385   // All legal types, and illegal non-vector types, are handled elsewhere.
7386   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7387   //
7388   if (N0->getOpcode() != ISD::LOAD)
7389     return SDValue();
7390 
7391   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7392 
7393   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7394       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7395       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7396     return SDValue();
7397 
7398   SmallVector<SDNode *, 4> SetCCs;
7399   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
7400     return SDValue();
7401 
7402   ISD::LoadExtType ExtType =
7403       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7404 
7405   // Try to split the vector types to get down to legal types.
7406   EVT SplitSrcVT = SrcVT;
7407   EVT SplitDstVT = DstVT;
7408   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7409          SplitSrcVT.getVectorNumElements() > 1) {
7410     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7411     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7412   }
7413 
7414   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7415     return SDValue();
7416 
7417   SDLoc DL(N);
7418   const unsigned NumSplits =
7419       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7420   const unsigned Stride = SplitSrcVT.getStoreSize();
7421   SmallVector<SDValue, 4> Loads;
7422   SmallVector<SDValue, 4> Chains;
7423 
7424   SDValue BasePtr = LN0->getBasePtr();
7425   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7426     const unsigned Offset = Idx * Stride;
7427     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7428 
7429     SDValue SplitLoad = DAG.getExtLoad(
7430         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
7431         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7432         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7433 
7434     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7435                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7436 
7437     Loads.push_back(SplitLoad.getValue(0));
7438     Chains.push_back(SplitLoad.getValue(1));
7439   }
7440 
7441   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7442   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7443 
7444   // Simplify TF.
7445   AddToWorklist(NewChain.getNode());
7446 
7447   CombineTo(N, NewValue);
7448 
7449   // Replace uses of the original load (before extension)
7450   // with a truncate of the concatenated sextloaded vectors.
7451   SDValue Trunc =
7452       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7453   ExtendSetCCUses(SetCCs, N0, NewValue, DL,
7454                   (ISD::NodeType)N->getOpcode());
7455   CombineTo(N0.getNode(), Trunc, NewChain);
7456   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7457 }
7458 
7459 /// If we're narrowing or widening the result of a vector select and the final
7460 /// size is the same size as a setcc (compare) feeding the select, then try to
7461 /// apply the cast operation to the select's operands because matching vector
7462 /// sizes for a select condition and other operands should be more efficient.
7463 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7464   unsigned CastOpcode = Cast->getOpcode();
7465   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7466           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7467           CastOpcode == ISD::FP_ROUND) &&
7468          "Unexpected opcode for vector select narrowing/widening");
7469 
7470   // We only do this transform before legal ops because the pattern may be
7471   // obfuscated by target-specific operations after legalization. Do not create
7472   // an illegal select op, however, because that may be difficult to lower.
7473   EVT VT = Cast->getValueType(0);
7474   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7475     return SDValue();
7476 
7477   SDValue VSel = Cast->getOperand(0);
7478   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7479       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7480     return SDValue();
7481 
7482   // Does the setcc have the same vector size as the casted select?
7483   SDValue SetCC = VSel.getOperand(0);
7484   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7485   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7486     return SDValue();
7487 
7488   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7489   SDValue A = VSel.getOperand(1);
7490   SDValue B = VSel.getOperand(2);
7491   SDValue CastA, CastB;
7492   SDLoc DL(Cast);
7493   if (CastOpcode == ISD::FP_ROUND) {
7494     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7495     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7496     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7497   } else {
7498     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7499     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7500   }
7501   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7502 }
7503 
7504 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7505   SDValue N0 = N->getOperand(0);
7506   EVT VT = N->getValueType(0);
7507   SDLoc DL(N);
7508 
7509   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7510                                               LegalOperations))
7511     return SDValue(Res, 0);
7512 
7513   // fold (sext (sext x)) -> (sext x)
7514   // fold (sext (aext x)) -> (sext x)
7515   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7516     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
7517 
7518   if (N0.getOpcode() == ISD::TRUNCATE) {
7519     // fold (sext (truncate (load x))) -> (sext (smaller load x))
7520     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
7521     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7522       SDNode *oye = N0.getOperand(0).getNode();
7523       if (NarrowLoad.getNode() != N0.getNode()) {
7524         CombineTo(N0.getNode(), NarrowLoad);
7525         // CombineTo deleted the truncate, if needed, but not what's under it.
7526         AddToWorklist(oye);
7527       }
7528       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7529     }
7530 
7531     // See if the value being truncated is already sign extended.  If so, just
7532     // eliminate the trunc/sext pair.
7533     SDValue Op = N0.getOperand(0);
7534     unsigned OpBits   = Op.getScalarValueSizeInBits();
7535     unsigned MidBits  = N0.getScalarValueSizeInBits();
7536     unsigned DestBits = VT.getScalarSizeInBits();
7537     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
7538 
7539     if (OpBits == DestBits) {
7540       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
7541       // bits, it is already ready.
7542       if (NumSignBits > DestBits-MidBits)
7543         return Op;
7544     } else if (OpBits < DestBits) {
7545       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
7546       // bits, just sext from i32.
7547       if (NumSignBits > OpBits-MidBits)
7548         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
7549     } else {
7550       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
7551       // bits, just truncate to i32.
7552       if (NumSignBits > OpBits-MidBits)
7553         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7554     }
7555 
7556     // fold (sext (truncate x)) -> (sextinreg x).
7557     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
7558                                                  N0.getValueType())) {
7559       if (OpBits < DestBits)
7560         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
7561       else if (OpBits > DestBits)
7562         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
7563       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7564                          DAG.getValueType(N0.getValueType()));
7565     }
7566   }
7567 
7568   // fold (sext (load x)) -> (sext (truncate (sextload x)))
7569   // Only generate vector extloads when 1) they're legal, and 2) they are
7570   // deemed desirable by the target.
7571   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7572       ((!LegalOperations && !VT.isVector() &&
7573         !cast<LoadSDNode>(N0)->isVolatile()) ||
7574        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
7575     bool DoXform = true;
7576     SmallVector<SDNode*, 4> SetCCs;
7577     if (!N0.hasOneUse())
7578       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::SIGN_EXTEND, SetCCs,
7579                                         TLI);
7580     if (VT.isVector())
7581       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7582     if (DoXform) {
7583       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7584       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7585                                        LN0->getBasePtr(), N0.getValueType(),
7586                                        LN0->getMemOperand());
7587       ExtendSetCCUses(SetCCs, N0, ExtLoad, DL, ISD::SIGN_EXTEND);
7588       // If the load value is used only by N, replace it via CombineTo N.
7589       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7590       CombineTo(N, ExtLoad);
7591       if (NoReplaceTrunc) {
7592         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7593       } else {
7594         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7595                                     N0.getValueType(), ExtLoad);
7596         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7597       }
7598       return SDValue(N, 0);
7599     }
7600   }
7601 
7602   // fold (sext (load x)) to multiple smaller sextloads.
7603   // Only on illegal but splittable vectors.
7604   if (SDValue ExtLoad = CombineExtLoad(N))
7605     return ExtLoad;
7606 
7607   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
7608   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
7609   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7610       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7611     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7612     EVT MemVT = LN0->getMemoryVT();
7613     if ((!LegalOperations && !LN0->isVolatile()) ||
7614         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
7615       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
7616                                        LN0->getBasePtr(), MemVT,
7617                                        LN0->getMemOperand());
7618       CombineTo(N, ExtLoad);
7619       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7620       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7621     }
7622   }
7623 
7624   // fold (sext (and/or/xor (load x), cst)) ->
7625   //      (and/or/xor (sextload x), (sext cst))
7626   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7627        N0.getOpcode() == ISD::XOR) &&
7628       isa<LoadSDNode>(N0.getOperand(0)) &&
7629       N0.getOperand(1).getOpcode() == ISD::Constant &&
7630       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7631     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
7632     EVT MemVT = LN00->getMemoryVT();
7633     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
7634       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
7635       SmallVector<SDNode*, 4> SetCCs;
7636       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
7637                                              ISD::SIGN_EXTEND, SetCCs, TLI);
7638       if (DoXform) {
7639         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
7640                                          LN00->getChain(), LN00->getBasePtr(),
7641                                          LN00->getMemoryVT(),
7642                                          LN00->getMemOperand());
7643         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7644         Mask = Mask.sext(VT.getSizeInBits());
7645         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7646                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7647         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, DL,
7648                         ISD::SIGN_EXTEND);
7649         bool NoReplaceTruncAnd = !N0.hasOneUse();
7650         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
7651         CombineTo(N, And);
7652         // If N0 has multiple uses, change other uses as well.
7653         if (NoReplaceTruncAnd) {
7654           SDValue TruncAnd =
7655               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7656           CombineTo(N0.getNode(), TruncAnd);
7657         }
7658         if (NoReplaceTrunc) {
7659           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
7660         } else {
7661           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
7662                                       LN00->getValueType(0), ExtLoad);
7663           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
7664         }
7665         return SDValue(N,0); // Return N so it doesn't get rechecked!
7666       }
7667     }
7668   }
7669 
7670   if (N0.getOpcode() == ISD::SETCC) {
7671     SDValue N00 = N0.getOperand(0);
7672     SDValue N01 = N0.getOperand(1);
7673     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7674     EVT N00VT = N0.getOperand(0).getValueType();
7675 
7676     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
7677     // Only do this before legalize for now.
7678     if (VT.isVector() && !LegalOperations &&
7679         TLI.getBooleanContents(N00VT) ==
7680             TargetLowering::ZeroOrNegativeOneBooleanContent) {
7681       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
7682       // of the same size as the compared operands. Only optimize sext(setcc())
7683       // if this is the case.
7684       EVT SVT = getSetCCResultType(N00VT);
7685 
7686       // We know that the # elements of the results is the same as the
7687       // # elements of the compare (and the # elements of the compare result
7688       // for that matter).  Check to see that they are the same size.  If so,
7689       // we know that the element size of the sext'd result matches the
7690       // element size of the compare operands.
7691       if (VT.getSizeInBits() == SVT.getSizeInBits())
7692         return DAG.getSetCC(DL, VT, N00, N01, CC);
7693 
7694       // If the desired elements are smaller or larger than the source
7695       // elements, we can use a matching integer vector type and then
7696       // truncate/sign extend.
7697       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
7698       if (SVT == MatchingVecType) {
7699         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
7700         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
7701       }
7702     }
7703 
7704     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
7705     // Here, T can be 1 or -1, depending on the type of the setcc and
7706     // getBooleanContents().
7707     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
7708 
7709     // To determine the "true" side of the select, we need to know the high bit
7710     // of the value returned by the setcc if it evaluates to true.
7711     // If the type of the setcc is i1, then the true case of the select is just
7712     // sext(i1 1), that is, -1.
7713     // If the type of the setcc is larger (say, i8) then the value of the high
7714     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
7715     // of the appropriate width.
7716     SDValue ExtTrueVal = (SetCCWidth == 1)
7717                              ? DAG.getAllOnesConstant(DL, VT)
7718                              : DAG.getBoolConstant(true, DL, VT, N00VT);
7719     SDValue Zero = DAG.getConstant(0, DL, VT);
7720     if (SDValue SCC =
7721             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
7722       return SCC;
7723 
7724     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
7725       EVT SetCCVT = getSetCCResultType(N00VT);
7726       // Don't do this transform for i1 because there's a select transform
7727       // that would reverse it.
7728       // TODO: We should not do this transform at all without a target hook
7729       // because a sext is likely cheaper than a select?
7730       if (SetCCVT.getScalarSizeInBits() != 1 &&
7731           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
7732         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
7733         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
7734       }
7735     }
7736   }
7737 
7738   // fold (sext x) -> (zext x) if the sign bit is known zero.
7739   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
7740       DAG.SignBitIsZero(N0))
7741     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
7742 
7743   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
7744     return NewVSel;
7745 
7746   return SDValue();
7747 }
7748 
7749 // isTruncateOf - If N is a truncate of some other value, return true, record
7750 // the value being truncated in Op and which of Op's bits are zero/one in Known.
7751 // This function computes KnownBits to avoid a duplicated call to
7752 // computeKnownBits in the caller.
7753 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
7754                          KnownBits &Known) {
7755   if (N->getOpcode() == ISD::TRUNCATE) {
7756     Op = N->getOperand(0);
7757     DAG.computeKnownBits(Op, Known);
7758     return true;
7759   }
7760 
7761   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
7762       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
7763     return false;
7764 
7765   SDValue Op0 = N->getOperand(0);
7766   SDValue Op1 = N->getOperand(1);
7767   assert(Op0.getValueType() == Op1.getValueType());
7768 
7769   if (isNullConstant(Op0))
7770     Op = Op1;
7771   else if (isNullConstant(Op1))
7772     Op = Op0;
7773   else
7774     return false;
7775 
7776   DAG.computeKnownBits(Op, Known);
7777 
7778   if (!(Known.Zero | 1).isAllOnesValue())
7779     return false;
7780 
7781   return true;
7782 }
7783 
7784 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
7785   SDValue N0 = N->getOperand(0);
7786   EVT VT = N->getValueType(0);
7787 
7788   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7789                                               LegalOperations))
7790     return SDValue(Res, 0);
7791 
7792   // fold (zext (zext x)) -> (zext x)
7793   // fold (zext (aext x)) -> (zext x)
7794   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
7795     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
7796                        N0.getOperand(0));
7797 
7798   // fold (zext (truncate x)) -> (zext x) or
7799   //      (zext (truncate x)) -> (truncate x)
7800   // This is valid when the truncated bits of x are already zero.
7801   // FIXME: We should extend this to work for vectors too.
7802   SDValue Op;
7803   KnownBits Known;
7804   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
7805     APInt TruncatedBits =
7806       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
7807       APInt(Op.getValueSizeInBits(), 0) :
7808       APInt::getBitsSet(Op.getValueSizeInBits(),
7809                         N0.getValueSizeInBits(),
7810                         std::min(Op.getValueSizeInBits(),
7811                                  VT.getSizeInBits()));
7812     if (TruncatedBits.isSubsetOf(Known.Zero))
7813       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7814   }
7815 
7816   // fold (zext (truncate x)) -> (and x, mask)
7817   if (N0.getOpcode() == ISD::TRUNCATE) {
7818     // fold (zext (truncate (load x))) -> (zext (smaller load x))
7819     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
7820     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
7821       SDNode *oye = N0.getOperand(0).getNode();
7822       if (NarrowLoad.getNode() != N0.getNode()) {
7823         CombineTo(N0.getNode(), NarrowLoad);
7824         // CombineTo deleted the truncate, if needed, but not what's under it.
7825         AddToWorklist(oye);
7826       }
7827       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7828     }
7829 
7830     EVT SrcVT = N0.getOperand(0).getValueType();
7831     EVT MinVT = N0.getValueType();
7832 
7833     // Try to mask before the extension to avoid having to generate a larger mask,
7834     // possibly over several sub-vectors.
7835     if (SrcVT.bitsLT(VT) && VT.isVector()) {
7836       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
7837                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
7838         SDValue Op = N0.getOperand(0);
7839         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7840         AddToWorklist(Op.getNode());
7841         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
7842         // Transfer the debug info; the new node is equivalent to N0.
7843         DAG.transferDbgValues(N0, ZExtOrTrunc);
7844         return ZExtOrTrunc;
7845       }
7846     }
7847 
7848     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
7849       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
7850       AddToWorklist(Op.getNode());
7851       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
7852       // We may safely transfer the debug info describing the truncate node over
7853       // to the equivalent and operation.
7854       DAG.transferDbgValues(N0, And);
7855       return And;
7856     }
7857   }
7858 
7859   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
7860   // if either of the casts is not free.
7861   if (N0.getOpcode() == ISD::AND &&
7862       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7863       N0.getOperand(1).getOpcode() == ISD::Constant &&
7864       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
7865                            N0.getValueType()) ||
7866        !TLI.isZExtFree(N0.getValueType(), VT))) {
7867     SDValue X = N0.getOperand(0).getOperand(0);
7868     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
7869     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7870     Mask = Mask.zext(VT.getSizeInBits());
7871     SDLoc DL(N);
7872     return DAG.getNode(ISD::AND, DL, VT,
7873                        X, DAG.getConstant(Mask, DL, VT));
7874   }
7875 
7876   // fold (zext (load x)) -> (zext (truncate (zextload x)))
7877   // Only generate vector extloads when 1) they're legal, and 2) they are
7878   // deemed desirable by the target.
7879   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7880       ((!LegalOperations && !VT.isVector() &&
7881         !cast<LoadSDNode>(N0)->isVolatile()) ||
7882        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
7883     bool DoXform = true;
7884     SmallVector<SDNode*, 4> SetCCs;
7885     if (!N0.hasOneUse())
7886       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ZERO_EXTEND, SetCCs,
7887                                         TLI);
7888     if (VT.isVector())
7889       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7890     if (DoXform) {
7891       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7892       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7893                                        LN0->getChain(),
7894                                        LN0->getBasePtr(), N0.getValueType(),
7895                                        LN0->getMemOperand());
7896 
7897       ExtendSetCCUses(SetCCs, N0, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
7898       // If the load value is used only by N, replace it via CombineTo N.
7899       bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7900       CombineTo(N, ExtLoad);
7901       if (NoReplaceTrunc) {
7902         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7903       } else {
7904         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
7905                                     N0.getValueType(), ExtLoad);
7906         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7907       }
7908       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7909     }
7910   }
7911 
7912   // fold (zext (load x)) to multiple smaller zextloads.
7913   // Only on illegal but splittable vectors.
7914   if (SDValue ExtLoad = CombineExtLoad(N))
7915     return ExtLoad;
7916 
7917   // fold (zext (and/or/xor (load x), cst)) ->
7918   //      (and/or/xor (zextload x), (zext cst))
7919   // Unless (and (load x) cst) will match as a zextload already and has
7920   // additional users.
7921   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7922        N0.getOpcode() == ISD::XOR) &&
7923       isa<LoadSDNode>(N0.getOperand(0)) &&
7924       N0.getOperand(1).getOpcode() == ISD::Constant &&
7925       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
7926     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
7927     EVT MemVT = LN00->getMemoryVT();
7928     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
7929         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
7930       bool DoXform = true;
7931       SmallVector<SDNode*, 4> SetCCs;
7932       if (!N0.hasOneUse()) {
7933         if (N0.getOpcode() == ISD::AND) {
7934           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
7935           EVT LoadResultTy = AndC->getValueType(0);
7936           EVT ExtVT;
7937           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
7938             DoXform = false;
7939         }
7940       }
7941       if (DoXform)
7942         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
7943                                           ISD::ZERO_EXTEND, SetCCs, TLI);
7944       if (DoXform) {
7945         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
7946                                          LN00->getChain(), LN00->getBasePtr(),
7947                                          LN00->getMemoryVT(),
7948                                          LN00->getMemOperand());
7949         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7950         Mask = Mask.zext(VT.getSizeInBits());
7951         SDLoc DL(N);
7952         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
7953                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
7954         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, DL,
7955                         ISD::ZERO_EXTEND);
7956         bool NoReplaceTruncAnd = !N0.hasOneUse();
7957         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
7958         CombineTo(N, And);
7959         // If N0 has multiple uses, change other uses as well.
7960         if (NoReplaceTruncAnd) {
7961           SDValue TruncAnd =
7962               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
7963           CombineTo(N0.getNode(), TruncAnd);
7964         }
7965         if (NoReplaceTrunc) {
7966           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
7967         } else {
7968           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
7969                                       LN00->getValueType(0), ExtLoad);
7970           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
7971         }
7972         return SDValue(N,0); // Return N so it doesn't get rechecked!
7973       }
7974     }
7975   }
7976 
7977   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
7978   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
7979   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
7980       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
7981     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7982     EVT MemVT = LN0->getMemoryVT();
7983     if ((!LegalOperations && !LN0->isVolatile()) ||
7984         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7985       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
7986                                        LN0->getChain(),
7987                                        LN0->getBasePtr(), MemVT,
7988                                        LN0->getMemOperand());
7989       CombineTo(N, ExtLoad);
7990       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7991       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
7992     }
7993   }
7994 
7995   if (N0.getOpcode() == ISD::SETCC) {
7996     // Only do this before legalize for now.
7997     if (!LegalOperations && VT.isVector() &&
7998         N0.getValueType().getVectorElementType() == MVT::i1) {
7999       EVT N00VT = N0.getOperand(0).getValueType();
8000       if (getSetCCResultType(N00VT) == N0.getValueType())
8001         return SDValue();
8002 
8003       // We know that the # elements of the results is the same as the #
8004       // elements of the compare (and the # elements of the compare result for
8005       // that matter). Check to see that they are the same size. If so, we know
8006       // that the element size of the sext'd result matches the element size of
8007       // the compare operands.
8008       SDLoc DL(N);
8009       SDValue VecOnes = DAG.getConstant(1, DL, VT);
8010       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8011         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8012         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8013                                      N0.getOperand(1), N0.getOperand(2));
8014         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8015       }
8016 
8017       // If the desired elements are smaller or larger than the source
8018       // elements we can use a matching integer vector type and then
8019       // truncate/sign extend.
8020       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8021       SDValue VsetCC =
8022           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8023                       N0.getOperand(1), N0.getOperand(2));
8024       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8025                          VecOnes);
8026     }
8027 
8028     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8029     SDLoc DL(N);
8030     if (SDValue SCC = SimplifySelectCC(
8031             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8032             DAG.getConstant(0, DL, VT),
8033             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8034       return SCC;
8035   }
8036 
8037   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8038   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8039       isa<ConstantSDNode>(N0.getOperand(1)) &&
8040       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8041       N0.hasOneUse()) {
8042     SDValue ShAmt = N0.getOperand(1);
8043     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8044     if (N0.getOpcode() == ISD::SHL) {
8045       SDValue InnerZExt = N0.getOperand(0);
8046       // If the original shl may be shifting out bits, do not perform this
8047       // transformation.
8048       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8049         InnerZExt.getOperand(0).getValueSizeInBits();
8050       if (ShAmtVal > KnownZeroBits)
8051         return SDValue();
8052     }
8053 
8054     SDLoc DL(N);
8055 
8056     // Ensure that the shift amount is wide enough for the shifted value.
8057     if (VT.getSizeInBits() >= 256)
8058       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8059 
8060     return DAG.getNode(N0.getOpcode(), DL, VT,
8061                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8062                        ShAmt);
8063   }
8064 
8065   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8066     return NewVSel;
8067 
8068   return SDValue();
8069 }
8070 
8071 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8072   SDValue N0 = N->getOperand(0);
8073   EVT VT = N->getValueType(0);
8074 
8075   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8076                                               LegalOperations))
8077     return SDValue(Res, 0);
8078 
8079   // fold (aext (aext x)) -> (aext x)
8080   // fold (aext (zext x)) -> (zext x)
8081   // fold (aext (sext x)) -> (sext x)
8082   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8083       N0.getOpcode() == ISD::ZERO_EXTEND ||
8084       N0.getOpcode() == ISD::SIGN_EXTEND)
8085     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8086 
8087   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8088   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8089   if (N0.getOpcode() == ISD::TRUNCATE) {
8090     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8091       SDNode *oye = N0.getOperand(0).getNode();
8092       if (NarrowLoad.getNode() != N0.getNode()) {
8093         CombineTo(N0.getNode(), NarrowLoad);
8094         // CombineTo deleted the truncate, if needed, but not what's under it.
8095         AddToWorklist(oye);
8096       }
8097       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8098     }
8099   }
8100 
8101   // fold (aext (truncate x))
8102   if (N0.getOpcode() == ISD::TRUNCATE)
8103     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8104 
8105   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8106   // if the trunc is not free.
8107   if (N0.getOpcode() == ISD::AND &&
8108       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8109       N0.getOperand(1).getOpcode() == ISD::Constant &&
8110       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8111                           N0.getValueType())) {
8112     SDLoc DL(N);
8113     SDValue X = N0.getOperand(0).getOperand(0);
8114     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8115     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8116     Mask = Mask.zext(VT.getSizeInBits());
8117     return DAG.getNode(ISD::AND, DL, VT,
8118                        X, DAG.getConstant(Mask, DL, VT));
8119   }
8120 
8121   // fold (aext (load x)) -> (aext (truncate (extload x)))
8122   // None of the supported targets knows how to perform load and any_ext
8123   // on vectors in one instruction.  We only perform this transformation on
8124   // scalars.
8125   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8126       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8127       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8128     bool DoXform = true;
8129     SmallVector<SDNode*, 4> SetCCs;
8130     if (!N0.hasOneUse())
8131       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8132                                         TLI);
8133     if (DoXform) {
8134       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8135       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8136                                        LN0->getChain(),
8137                                        LN0->getBasePtr(), N0.getValueType(),
8138                                        LN0->getMemOperand());
8139       ExtendSetCCUses(SetCCs, N0, ExtLoad, SDLoc(N),
8140                       ISD::ANY_EXTEND);
8141       // If the load value is used only by N, replace it via CombineTo N.
8142       bool NoReplaceTrunc = N0.hasOneUse();
8143       CombineTo(N, ExtLoad);
8144       if (NoReplaceTrunc) {
8145         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8146       } else {
8147         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8148                                     N0.getValueType(), ExtLoad);
8149         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8150       }
8151       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8152     }
8153   }
8154 
8155   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8156   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8157   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8158   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8159       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8160     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8161     ISD::LoadExtType ExtType = LN0->getExtensionType();
8162     EVT MemVT = LN0->getMemoryVT();
8163     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8164       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8165                                        VT, LN0->getChain(), LN0->getBasePtr(),
8166                                        MemVT, LN0->getMemOperand());
8167       CombineTo(N, ExtLoad);
8168       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8169       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8170     }
8171   }
8172 
8173   if (N0.getOpcode() == ISD::SETCC) {
8174     // For vectors:
8175     // aext(setcc) -> vsetcc
8176     // aext(setcc) -> truncate(vsetcc)
8177     // aext(setcc) -> aext(vsetcc)
8178     // Only do this before legalize for now.
8179     if (VT.isVector() && !LegalOperations) {
8180       EVT N00VT = N0.getOperand(0).getValueType();
8181       if (getSetCCResultType(N00VT) == N0.getValueType())
8182         return SDValue();
8183 
8184       // We know that the # elements of the results is the same as the
8185       // # elements of the compare (and the # elements of the compare result
8186       // for that matter).  Check to see that they are the same size.  If so,
8187       // we know that the element size of the sext'd result matches the
8188       // element size of the compare operands.
8189       if (VT.getSizeInBits() == N00VT.getSizeInBits())
8190         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8191                              N0.getOperand(1),
8192                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
8193       // If the desired elements are smaller or larger than the source
8194       // elements we can use a matching integer vector type and then
8195       // truncate/any extend
8196       else {
8197         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8198         SDValue VsetCC =
8199           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8200                         N0.getOperand(1),
8201                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
8202         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8203       }
8204     }
8205 
8206     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8207     SDLoc DL(N);
8208     if (SDValue SCC = SimplifySelectCC(
8209             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8210             DAG.getConstant(0, DL, VT),
8211             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8212       return SCC;
8213   }
8214 
8215   return SDValue();
8216 }
8217 
8218 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
8219   unsigned Opcode = N->getOpcode();
8220   SDValue N0 = N->getOperand(0);
8221   SDValue N1 = N->getOperand(1);
8222   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
8223 
8224   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
8225   if (N0.getOpcode() == Opcode &&
8226       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
8227     return N0;
8228 
8229   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
8230       N0.getOperand(0).getOpcode() == Opcode) {
8231     // We have an assert, truncate, assert sandwich. Make one stronger assert
8232     // by asserting on the smallest asserted type to the larger source type.
8233     // This eliminates the later assert:
8234     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
8235     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
8236     SDValue BigA = N0.getOperand(0);
8237     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
8238     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
8239            "Asserting zero/sign-extended bits to a type larger than the "
8240            "truncated destination does not provide information");
8241 
8242     SDLoc DL(N);
8243     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
8244     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
8245     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
8246                                     BigA.getOperand(0), MinAssertVTVal);
8247     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
8248   }
8249 
8250   return SDValue();
8251 }
8252 
8253 /// If the result of a wider load is shifted to right of N  bits and then
8254 /// truncated to a narrower type and where N is a multiple of number of bits of
8255 /// the narrower type, transform it to a narrower load from address + N / num of
8256 /// bits of new type. Also narrow the load if the result is masked with an AND
8257 /// to effectively produce a smaller type. If the result is to be extended, also
8258 /// fold the extension to form a extending load.
8259 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
8260   unsigned Opc = N->getOpcode();
8261 
8262   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
8263   SDValue N0 = N->getOperand(0);
8264   EVT VT = N->getValueType(0);
8265   EVT ExtVT = VT;
8266 
8267   // This transformation isn't valid for vector loads.
8268   if (VT.isVector())
8269     return SDValue();
8270 
8271   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8272   // extended to VT.
8273   if (Opc == ISD::SIGN_EXTEND_INREG) {
8274     ExtType = ISD::SEXTLOAD;
8275     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8276   } else if (Opc == ISD::SRL) {
8277     // Another special-case: SRL is basically zero-extending a narrower value,
8278     // or it maybe shifting a higher subword, half or byte into the lowest
8279     // bits.
8280     ExtType = ISD::ZEXTLOAD;
8281     N0 = SDValue(N, 0);
8282 
8283     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
8284     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8285     if (!N01 || !LN0)
8286       return SDValue();
8287 
8288     uint64_t ShiftAmt = N01->getZExtValue();
8289     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
8290     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
8291       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
8292     else
8293       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8294                                 VT.getSizeInBits() - ShiftAmt);
8295   } else if (Opc == ISD::AND) {
8296     // An AND with a constant mask is the same as a truncate + zero-extend.
8297     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
8298     if (!AndC || !AndC->getAPIntValue().isMask())
8299       return SDValue();
8300 
8301     unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
8302     ExtType = ISD::ZEXTLOAD;
8303     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
8304   }
8305 
8306   unsigned ShAmt = 0;
8307   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8308     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8309       ShAmt = N01->getZExtValue();
8310       unsigned EVTBits = ExtVT.getSizeInBits();
8311       // Is the shift amount a multiple of size of VT?
8312       if ((ShAmt & (EVTBits-1)) == 0) {
8313         N0 = N0.getOperand(0);
8314         // Is the load width a multiple of size of VT?
8315         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8316           return SDValue();
8317       }
8318 
8319       // At this point, we must have a load or else we can't do the transform.
8320       if (!isa<LoadSDNode>(N0)) return SDValue();
8321 
8322       // Because a SRL must be assumed to *need* to zero-extend the high bits
8323       // (as opposed to anyext the high bits), we can't combine the zextload
8324       // lowering of SRL and an sextload.
8325       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
8326         return SDValue();
8327 
8328       // If the shift amount is larger than the input type then we're not
8329       // accessing any of the loaded bytes.  If the load was a zextload/extload
8330       // then the result of the shift+trunc is zero/undef (handled elsewhere).
8331       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
8332         return SDValue();
8333     }
8334   }
8335 
8336   // If the load is shifted left (and the result isn't shifted back right),
8337   // we can fold the truncate through the shift.
8338   unsigned ShLeftAmt = 0;
8339   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8340       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8341     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8342       ShLeftAmt = N01->getZExtValue();
8343       N0 = N0.getOperand(0);
8344     }
8345   }
8346 
8347   // If we haven't found a load, we can't narrow it.
8348   if (!isa<LoadSDNode>(N0))
8349     return SDValue();
8350 
8351   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8352   if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
8353     return SDValue();
8354 
8355   // For big endian targets, we need to adjust the offset to the pointer to
8356   // load the correct bytes.
8357   if (DAG.getDataLayout().isBigEndian()) {
8358     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8359     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8360     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8361   }
8362 
8363   EVT PtrType = N0.getOperand(1).getValueType();
8364   uint64_t PtrOff = ShAmt / 8;
8365   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8366   SDLoc DL(LN0);
8367   // The original load itself didn't wrap, so an offset within it doesn't.
8368   SDNodeFlags Flags;
8369   Flags.setNoUnsignedWrap(true);
8370   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8371                                PtrType, LN0->getBasePtr(),
8372                                DAG.getConstant(PtrOff, DL, PtrType),
8373                                Flags);
8374   AddToWorklist(NewPtr.getNode());
8375 
8376   SDValue Load;
8377   if (ExtType == ISD::NON_EXTLOAD)
8378     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8379                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8380                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8381   else
8382     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8383                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8384                           NewAlign, LN0->getMemOperand()->getFlags(),
8385                           LN0->getAAInfo());
8386 
8387   // Replace the old load's chain with the new load's chain.
8388   WorklistRemover DeadNodes(*this);
8389   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8390 
8391   // Shift the result left, if we've swallowed a left shift.
8392   SDValue Result = Load;
8393   if (ShLeftAmt != 0) {
8394     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8395     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8396       ShImmTy = VT;
8397     // If the shift amount is as large as the result size (but, presumably,
8398     // no larger than the source) then the useful bits of the result are
8399     // zero; we can't simply return the shortened shift, because the result
8400     // of that operation is undefined.
8401     SDLoc DL(N0);
8402     if (ShLeftAmt >= VT.getSizeInBits())
8403       Result = DAG.getConstant(0, DL, VT);
8404     else
8405       Result = DAG.getNode(ISD::SHL, DL, VT,
8406                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8407   }
8408 
8409   // Return the new loaded value.
8410   return Result;
8411 }
8412 
8413 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8414   SDValue N0 = N->getOperand(0);
8415   SDValue N1 = N->getOperand(1);
8416   EVT VT = N->getValueType(0);
8417   EVT EVT = cast<VTSDNode>(N1)->getVT();
8418   unsigned VTBits = VT.getScalarSizeInBits();
8419   unsigned EVTBits = EVT.getScalarSizeInBits();
8420 
8421   if (N0.isUndef())
8422     return DAG.getUNDEF(VT);
8423 
8424   // fold (sext_in_reg c1) -> c1
8425   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8426     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8427 
8428   // If the input is already sign extended, just drop the extension.
8429   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8430     return N0;
8431 
8432   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8433   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8434       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8435     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8436                        N0.getOperand(0), N1);
8437 
8438   // fold (sext_in_reg (sext x)) -> (sext x)
8439   // fold (sext_in_reg (aext x)) -> (sext x)
8440   // if x is small enough.
8441   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8442     SDValue N00 = N0.getOperand(0);
8443     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8444         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8445       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8446   }
8447 
8448   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
8449   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8450        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8451        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8452       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8453     if (!LegalOperations ||
8454         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8455       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8456   }
8457 
8458   // fold (sext_in_reg (zext x)) -> (sext x)
8459   // iff we are extending the source sign bit.
8460   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8461     SDValue N00 = N0.getOperand(0);
8462     if (N00.getScalarValueSizeInBits() == EVTBits &&
8463         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8464       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8465   }
8466 
8467   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8468   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8469     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8470 
8471   // fold operands of sext_in_reg based on knowledge that the top bits are not
8472   // demanded.
8473   if (SimplifyDemandedBits(SDValue(N, 0)))
8474     return SDValue(N, 0);
8475 
8476   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8477   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8478   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8479     return NarrowLoad;
8480 
8481   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8482   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8483   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8484   if (N0.getOpcode() == ISD::SRL) {
8485     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8486       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8487         // We can turn this into an SRA iff the input to the SRL is already sign
8488         // extended enough.
8489         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8490         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8491           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8492                              N0.getOperand(0), N0.getOperand(1));
8493       }
8494   }
8495 
8496   // fold (sext_inreg (extload x)) -> (sextload x)
8497   // If sextload is not supported by target, we can only do the combine when
8498   // load has one use. Doing otherwise can block folding the extload with other
8499   // extends that the target does support.
8500   if (ISD::isEXTLoad(N0.getNode()) &&
8501       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8502       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8503       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
8504         N0.hasOneUse()) ||
8505        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8506     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8507     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8508                                      LN0->getChain(),
8509                                      LN0->getBasePtr(), EVT,
8510                                      LN0->getMemOperand());
8511     CombineTo(N, ExtLoad);
8512     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8513     AddToWorklist(ExtLoad.getNode());
8514     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8515   }
8516   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8517   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8518       N0.hasOneUse() &&
8519       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8520       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8521        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8522     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8523     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8524                                      LN0->getChain(),
8525                                      LN0->getBasePtr(), EVT,
8526                                      LN0->getMemOperand());
8527     CombineTo(N, ExtLoad);
8528     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8529     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8530   }
8531 
8532   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8533   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8534     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8535                                            N0.getOperand(1), false))
8536       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8537                          BSwap, N1);
8538   }
8539 
8540   return SDValue();
8541 }
8542 
8543 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8544   SDValue N0 = N->getOperand(0);
8545   EVT VT = N->getValueType(0);
8546 
8547   if (N0.isUndef())
8548     return DAG.getUNDEF(VT);
8549 
8550   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8551                                               LegalOperations))
8552     return SDValue(Res, 0);
8553 
8554   return SDValue();
8555 }
8556 
8557 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8558   SDValue N0 = N->getOperand(0);
8559   EVT VT = N->getValueType(0);
8560 
8561   if (N0.isUndef())
8562     return DAG.getUNDEF(VT);
8563 
8564   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8565                                               LegalOperations))
8566     return SDValue(Res, 0);
8567 
8568   return SDValue();
8569 }
8570 
8571 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8572   SDValue N0 = N->getOperand(0);
8573   EVT VT = N->getValueType(0);
8574   bool isLE = DAG.getDataLayout().isLittleEndian();
8575 
8576   // noop truncate
8577   if (N0.getValueType() == N->getValueType(0))
8578     return N0;
8579 
8580   // fold (truncate (truncate x)) -> (truncate x)
8581   if (N0.getOpcode() == ISD::TRUNCATE)
8582     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8583 
8584   // fold (truncate c1) -> c1
8585   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
8586     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
8587     if (C.getNode() != N)
8588       return C;
8589   }
8590 
8591   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
8592   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
8593       N0.getOpcode() == ISD::SIGN_EXTEND ||
8594       N0.getOpcode() == ISD::ANY_EXTEND) {
8595     // if the source is smaller than the dest, we still need an extend.
8596     if (N0.getOperand(0).getValueType().bitsLT(VT))
8597       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8598     // if the source is larger than the dest, than we just need the truncate.
8599     if (N0.getOperand(0).getValueType().bitsGT(VT))
8600       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
8601     // if the source and dest are the same type, we can drop both the extend
8602     // and the truncate.
8603     return N0.getOperand(0);
8604   }
8605 
8606   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
8607   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
8608     return SDValue();
8609 
8610   // Fold extract-and-trunc into a narrow extract. For example:
8611   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
8612   //   i32 y = TRUNCATE(i64 x)
8613   //        -- becomes --
8614   //   v16i8 b = BITCAST (v2i64 val)
8615   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
8616   //
8617   // Note: We only run this optimization after type legalization (which often
8618   // creates this pattern) and before operation legalization after which
8619   // we need to be more careful about the vector instructions that we generate.
8620   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8621       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
8622     EVT VecTy = N0.getOperand(0).getValueType();
8623     EVT ExTy = N0.getValueType();
8624     EVT TrTy = N->getValueType(0);
8625 
8626     unsigned NumElem = VecTy.getVectorNumElements();
8627     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
8628 
8629     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
8630     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
8631 
8632     SDValue EltNo = N0->getOperand(1);
8633     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
8634       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
8635       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
8636       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
8637 
8638       SDLoc DL(N);
8639       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
8640                          DAG.getBitcast(NVT, N0.getOperand(0)),
8641                          DAG.getConstant(Index, DL, IndexTy));
8642     }
8643   }
8644 
8645   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
8646   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
8647     EVT SrcVT = N0.getValueType();
8648     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
8649         TLI.isTruncateFree(SrcVT, VT)) {
8650       SDLoc SL(N0);
8651       SDValue Cond = N0.getOperand(0);
8652       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8653       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
8654       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
8655     }
8656   }
8657 
8658   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
8659   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8660       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
8661       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
8662     SDValue Amt = N0.getOperand(1);
8663     KnownBits Known;
8664     DAG.computeKnownBits(Amt, Known);
8665     unsigned Size = VT.getScalarSizeInBits();
8666     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
8667       SDLoc SL(N);
8668       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
8669 
8670       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8671       if (AmtVT != Amt.getValueType()) {
8672         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
8673         AddToWorklist(Amt.getNode());
8674       }
8675       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
8676     }
8677   }
8678 
8679   // Fold a series of buildvector, bitcast, and truncate if possible.
8680   // For example fold
8681   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
8682   //   (2xi32 (buildvector x, y)).
8683   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
8684       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
8685       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
8686       N0.getOperand(0).hasOneUse()) {
8687     SDValue BuildVect = N0.getOperand(0);
8688     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
8689     EVT TruncVecEltTy = VT.getVectorElementType();
8690 
8691     // Check that the element types match.
8692     if (BuildVectEltTy == TruncVecEltTy) {
8693       // Now we only need to compute the offset of the truncated elements.
8694       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
8695       unsigned TruncVecNumElts = VT.getVectorNumElements();
8696       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
8697 
8698       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
8699              "Invalid number of elements");
8700 
8701       SmallVector<SDValue, 8> Opnds;
8702       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
8703         Opnds.push_back(BuildVect.getOperand(i));
8704 
8705       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
8706     }
8707   }
8708 
8709   // See if we can simplify the input to this truncate through knowledge that
8710   // only the low bits are being used.
8711   // For example "trunc (or (shl x, 8), y)" // -> trunc y
8712   // Currently we only perform this optimization on scalars because vectors
8713   // may have different active low bits.
8714   if (!VT.isVector()) {
8715     APInt Mask =
8716         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
8717     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
8718       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
8719   }
8720 
8721   // fold (truncate (load x)) -> (smaller load x)
8722   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
8723   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
8724     if (SDValue Reduced = ReduceLoadWidth(N))
8725       return Reduced;
8726 
8727     // Handle the case where the load remains an extending load even
8728     // after truncation.
8729     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
8730       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8731       if (!LN0->isVolatile() &&
8732           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
8733         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
8734                                          VT, LN0->getChain(), LN0->getBasePtr(),
8735                                          LN0->getMemoryVT(),
8736                                          LN0->getMemOperand());
8737         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
8738         return NewLoad;
8739       }
8740     }
8741   }
8742 
8743   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
8744   // where ... are all 'undef'.
8745   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
8746     SmallVector<EVT, 8> VTs;
8747     SDValue V;
8748     unsigned Idx = 0;
8749     unsigned NumDefs = 0;
8750 
8751     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
8752       SDValue X = N0.getOperand(i);
8753       if (!X.isUndef()) {
8754         V = X;
8755         Idx = i;
8756         NumDefs++;
8757       }
8758       // Stop if more than one members are non-undef.
8759       if (NumDefs > 1)
8760         break;
8761       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
8762                                      VT.getVectorElementType(),
8763                                      X.getValueType().getVectorNumElements()));
8764     }
8765 
8766     if (NumDefs == 0)
8767       return DAG.getUNDEF(VT);
8768 
8769     if (NumDefs == 1) {
8770       assert(V.getNode() && "The single defined operand is empty!");
8771       SmallVector<SDValue, 8> Opnds;
8772       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
8773         if (i != Idx) {
8774           Opnds.push_back(DAG.getUNDEF(VTs[i]));
8775           continue;
8776         }
8777         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
8778         AddToWorklist(NV.getNode());
8779         Opnds.push_back(NV);
8780       }
8781       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
8782     }
8783   }
8784 
8785   // Fold truncate of a bitcast of a vector to an extract of the low vector
8786   // element.
8787   //
8788   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
8789   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
8790     SDValue VecSrc = N0.getOperand(0);
8791     EVT SrcVT = VecSrc.getValueType();
8792     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
8793         (!LegalOperations ||
8794          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
8795       SDLoc SL(N);
8796 
8797       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
8798       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
8799       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
8800                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
8801     }
8802   }
8803 
8804   // Simplify the operands using demanded-bits information.
8805   if (!VT.isVector() &&
8806       SimplifyDemandedBits(SDValue(N, 0)))
8807     return SDValue(N, 0);
8808 
8809   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
8810   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
8811   // When the adde's carry is not used.
8812   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
8813       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
8814       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
8815     SDLoc SL(N);
8816     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
8817     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
8818     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
8819     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
8820   }
8821 
8822   // fold (truncate (extract_subvector(ext x))) ->
8823   //      (extract_subvector x)
8824   // TODO: This can be generalized to cover cases where the truncate and extract
8825   // do not fully cancel each other out.
8826   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
8827     SDValue N00 = N0.getOperand(0);
8828     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
8829         N00.getOpcode() == ISD::ZERO_EXTEND ||
8830         N00.getOpcode() == ISD::ANY_EXTEND) {
8831       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
8832           VT.getVectorElementType())
8833         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
8834                            N00.getOperand(0), N0.getOperand(1));
8835     }
8836   }
8837 
8838   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8839     return NewVSel;
8840 
8841   return SDValue();
8842 }
8843 
8844 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
8845   SDValue Elt = N->getOperand(i);
8846   if (Elt.getOpcode() != ISD::MERGE_VALUES)
8847     return Elt.getNode();
8848   return Elt.getOperand(Elt.getResNo()).getNode();
8849 }
8850 
8851 /// build_pair (load, load) -> load
8852 /// if load locations are consecutive.
8853 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
8854   assert(N->getOpcode() == ISD::BUILD_PAIR);
8855 
8856   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
8857   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
8858 
8859   // A BUILD_PAIR is always having the least significant part in elt 0 and the
8860   // most significant part in elt 1. So when combining into one large load, we
8861   // need to consider the endianness.
8862   if (DAG.getDataLayout().isBigEndian())
8863     std::swap(LD1, LD2);
8864 
8865   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
8866       LD1->getAddressSpace() != LD2->getAddressSpace())
8867     return SDValue();
8868   EVT LD1VT = LD1->getValueType(0);
8869   unsigned LD1Bytes = LD1VT.getStoreSize();
8870   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
8871       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
8872     unsigned Align = LD1->getAlignment();
8873     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
8874         VT.getTypeForEVT(*DAG.getContext()));
8875 
8876     if (NewAlign <= Align &&
8877         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
8878       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
8879                          LD1->getPointerInfo(), Align);
8880   }
8881 
8882   return SDValue();
8883 }
8884 
8885 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
8886   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
8887   // and Lo parts; on big-endian machines it doesn't.
8888   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
8889 }
8890 
8891 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
8892                                     const TargetLowering &TLI) {
8893   // If this is not a bitcast to an FP type or if the target doesn't have
8894   // IEEE754-compliant FP logic, we're done.
8895   EVT VT = N->getValueType(0);
8896   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
8897     return SDValue();
8898 
8899   // TODO: Use splat values for the constant-checking below and remove this
8900   // restriction.
8901   SDValue N0 = N->getOperand(0);
8902   EVT SourceVT = N0.getValueType();
8903   if (SourceVT.isVector())
8904     return SDValue();
8905 
8906   unsigned FPOpcode;
8907   APInt SignMask;
8908   switch (N0.getOpcode()) {
8909   case ISD::AND:
8910     FPOpcode = ISD::FABS;
8911     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
8912     break;
8913   case ISD::XOR:
8914     FPOpcode = ISD::FNEG;
8915     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
8916     break;
8917   // TODO: ISD::OR --> ISD::FNABS?
8918   default:
8919     return SDValue();
8920   }
8921 
8922   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
8923   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
8924   SDValue LogicOp0 = N0.getOperand(0);
8925   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8926   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
8927       LogicOp0.getOpcode() == ISD::BITCAST &&
8928       LogicOp0->getOperand(0).getValueType() == VT)
8929     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
8930 
8931   return SDValue();
8932 }
8933 
8934 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
8935   SDValue N0 = N->getOperand(0);
8936   EVT VT = N->getValueType(0);
8937 
8938   if (N0.isUndef())
8939     return DAG.getUNDEF(VT);
8940 
8941   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
8942   // Only do this before legalize, since afterward the target may be depending
8943   // on the bitconvert.
8944   // First check to see if this is all constant.
8945   if (!LegalTypes &&
8946       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
8947       VT.isVector()) {
8948     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
8949 
8950     EVT DestEltVT = N->getValueType(0).getVectorElementType();
8951     assert(!DestEltVT.isVector() &&
8952            "Element type of vector ValueType must not be vector!");
8953     if (isSimple)
8954       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
8955   }
8956 
8957   // If the input is a constant, let getNode fold it.
8958   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
8959     // If we can't allow illegal operations, we need to check that this is just
8960     // a fp -> int or int -> conversion and that the resulting operation will
8961     // be legal.
8962     if (!LegalOperations ||
8963         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
8964          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
8965         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
8966          TLI.isOperationLegal(ISD::Constant, VT)))
8967       return DAG.getBitcast(VT, N0);
8968   }
8969 
8970   // (conv (conv x, t1), t2) -> (conv x, t2)
8971   if (N0.getOpcode() == ISD::BITCAST)
8972     return DAG.getBitcast(VT, N0.getOperand(0));
8973 
8974   // fold (conv (load x)) -> (load (conv*)x)
8975   // If the resultant load doesn't need a higher alignment than the original!
8976   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8977       // Do not change the width of a volatile load.
8978       !cast<LoadSDNode>(N0)->isVolatile() &&
8979       // Do not remove the cast if the types differ in endian layout.
8980       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
8981           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
8982       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
8983       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
8984     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8985     unsigned OrigAlign = LN0->getAlignment();
8986 
8987     bool Fast = false;
8988     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8989                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
8990         Fast) {
8991       SDValue Load =
8992           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
8993                       LN0->getPointerInfo(), OrigAlign,
8994                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8995       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8996       return Load;
8997     }
8998   }
8999 
9000   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9001     return V;
9002 
9003   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9004   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9005   //
9006   // For ppc_fp128:
9007   // fold (bitcast (fneg x)) ->
9008   //     flipbit = signbit
9009   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9010   //
9011   // fold (bitcast (fabs x)) ->
9012   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
9013   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9014   // This often reduces constant pool loads.
9015   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9016        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9017       N0.getNode()->hasOneUse() && VT.isInteger() &&
9018       !VT.isVector() && !N0.getValueType().isVector()) {
9019     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9020     AddToWorklist(NewConv.getNode());
9021 
9022     SDLoc DL(N);
9023     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9024       assert(VT.getSizeInBits() == 128);
9025       SDValue SignBit = DAG.getConstant(
9026           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9027       SDValue FlipBit;
9028       if (N0.getOpcode() == ISD::FNEG) {
9029         FlipBit = SignBit;
9030         AddToWorklist(FlipBit.getNode());
9031       } else {
9032         assert(N0.getOpcode() == ISD::FABS);
9033         SDValue Hi =
9034             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9035                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9036                                               SDLoc(NewConv)));
9037         AddToWorklist(Hi.getNode());
9038         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9039         AddToWorklist(FlipBit.getNode());
9040       }
9041       SDValue FlipBits =
9042           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9043       AddToWorklist(FlipBits.getNode());
9044       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9045     }
9046     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9047     if (N0.getOpcode() == ISD::FNEG)
9048       return DAG.getNode(ISD::XOR, DL, VT,
9049                          NewConv, DAG.getConstant(SignBit, DL, VT));
9050     assert(N0.getOpcode() == ISD::FABS);
9051     return DAG.getNode(ISD::AND, DL, VT,
9052                        NewConv, DAG.getConstant(~SignBit, DL, VT));
9053   }
9054 
9055   // fold (bitconvert (fcopysign cst, x)) ->
9056   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
9057   // Note that we don't handle (copysign x, cst) because this can always be
9058   // folded to an fneg or fabs.
9059   //
9060   // For ppc_fp128:
9061   // fold (bitcast (fcopysign cst, x)) ->
9062   //     flipbit = (and (extract_element
9063   //                     (xor (bitcast cst), (bitcast x)), 0),
9064   //                    signbit)
9065   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
9066   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9067       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9068       VT.isInteger() && !VT.isVector()) {
9069     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9070     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9071     if (isTypeLegal(IntXVT)) {
9072       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9073       AddToWorklist(X.getNode());
9074 
9075       // If X has a different width than the result/lhs, sext it or truncate it.
9076       unsigned VTWidth = VT.getSizeInBits();
9077       if (OrigXWidth < VTWidth) {
9078         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9079         AddToWorklist(X.getNode());
9080       } else if (OrigXWidth > VTWidth) {
9081         // To get the sign bit in the right place, we have to shift it right
9082         // before truncating.
9083         SDLoc DL(X);
9084         X = DAG.getNode(ISD::SRL, DL,
9085                         X.getValueType(), X,
9086                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9087                                         X.getValueType()));
9088         AddToWorklist(X.getNode());
9089         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9090         AddToWorklist(X.getNode());
9091       }
9092 
9093       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9094         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9095         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9096         AddToWorklist(Cst.getNode());
9097         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9098         AddToWorklist(X.getNode());
9099         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9100         AddToWorklist(XorResult.getNode());
9101         SDValue XorResult64 = DAG.getNode(
9102             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9103             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9104                                   SDLoc(XorResult)));
9105         AddToWorklist(XorResult64.getNode());
9106         SDValue FlipBit =
9107             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9108                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9109         AddToWorklist(FlipBit.getNode());
9110         SDValue FlipBits =
9111             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9112         AddToWorklist(FlipBits.getNode());
9113         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9114       }
9115       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9116       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9117                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
9118       AddToWorklist(X.getNode());
9119 
9120       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9121       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9122                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9123       AddToWorklist(Cst.getNode());
9124 
9125       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9126     }
9127   }
9128 
9129   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9130   if (N0.getOpcode() == ISD::BUILD_PAIR)
9131     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9132       return CombineLD;
9133 
9134   // Remove double bitcasts from shuffles - this is often a legacy of
9135   // XformToShuffleWithZero being used to combine bitmaskings (of
9136   // float vectors bitcast to integer vectors) into shuffles.
9137   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9138   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9139       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9140       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9141       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9142     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9143 
9144     // If operands are a bitcast, peek through if it casts the original VT.
9145     // If operands are a constant, just bitcast back to original VT.
9146     auto PeekThroughBitcast = [&](SDValue Op) {
9147       if (Op.getOpcode() == ISD::BITCAST &&
9148           Op.getOperand(0).getValueType() == VT)
9149         return SDValue(Op.getOperand(0));
9150       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9151           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9152         return DAG.getBitcast(VT, Op);
9153       return SDValue();
9154     };
9155 
9156     // FIXME: If either input vector is bitcast, try to convert the shuffle to
9157     // the result type of this bitcast. This would eliminate at least one
9158     // bitcast. See the transform in InstCombine.
9159     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9160     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9161     if (!(SV0 && SV1))
9162       return SDValue();
9163 
9164     int MaskScale =
9165         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9166     SmallVector<int, 8> NewMask;
9167     for (int M : SVN->getMask())
9168       for (int i = 0; i != MaskScale; ++i)
9169         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9170 
9171     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9172     if (!LegalMask) {
9173       std::swap(SV0, SV1);
9174       ShuffleVectorSDNode::commuteMask(NewMask);
9175       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9176     }
9177 
9178     if (LegalMask)
9179       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9180   }
9181 
9182   return SDValue();
9183 }
9184 
9185 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
9186   EVT VT = N->getValueType(0);
9187   return CombineConsecutiveLoads(N, VT);
9188 }
9189 
9190 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
9191 /// operands. DstEltVT indicates the destination element value type.
9192 SDValue DAGCombiner::
9193 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
9194   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
9195 
9196   // If this is already the right type, we're done.
9197   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
9198 
9199   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
9200   unsigned DstBitSize = DstEltVT.getSizeInBits();
9201 
9202   // If this is a conversion of N elements of one type to N elements of another
9203   // type, convert each element.  This handles FP<->INT cases.
9204   if (SrcBitSize == DstBitSize) {
9205     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9206                               BV->getValueType(0).getVectorNumElements());
9207 
9208     // Due to the FP element handling below calling this routine recursively,
9209     // we can end up with a scalar-to-vector node here.
9210     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
9211       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
9212                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
9213 
9214     SmallVector<SDValue, 8> Ops;
9215     for (SDValue Op : BV->op_values()) {
9216       // If the vector element type is not legal, the BUILD_VECTOR operands
9217       // are promoted and implicitly truncated.  Make that explicit here.
9218       if (Op.getValueType() != SrcEltVT)
9219         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
9220       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
9221       AddToWorklist(Ops.back().getNode());
9222     }
9223     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
9224   }
9225 
9226   // Otherwise, we're growing or shrinking the elements.  To avoid having to
9227   // handle annoying details of growing/shrinking FP values, we convert them to
9228   // int first.
9229   if (SrcEltVT.isFloatingPoint()) {
9230     // Convert the input float vector to a int vector where the elements are the
9231     // same sizes.
9232     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
9233     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
9234     SrcEltVT = IntVT;
9235   }
9236 
9237   // Now we know the input is an integer vector.  If the output is a FP type,
9238   // convert to integer first, then to FP of the right size.
9239   if (DstEltVT.isFloatingPoint()) {
9240     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
9241     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
9242 
9243     // Next, convert to FP elements of the same size.
9244     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
9245   }
9246 
9247   SDLoc DL(BV);
9248 
9249   // Okay, we know the src/dst types are both integers of differing types.
9250   // Handling growing first.
9251   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
9252   if (SrcBitSize < DstBitSize) {
9253     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
9254 
9255     SmallVector<SDValue, 8> Ops;
9256     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
9257          i += NumInputsPerOutput) {
9258       bool isLE = DAG.getDataLayout().isLittleEndian();
9259       APInt NewBits = APInt(DstBitSize, 0);
9260       bool EltIsUndef = true;
9261       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
9262         // Shift the previously computed bits over.
9263         NewBits <<= SrcBitSize;
9264         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
9265         if (Op.isUndef()) continue;
9266         EltIsUndef = false;
9267 
9268         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
9269                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
9270       }
9271 
9272       if (EltIsUndef)
9273         Ops.push_back(DAG.getUNDEF(DstEltVT));
9274       else
9275         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
9276     }
9277 
9278     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
9279     return DAG.getBuildVector(VT, DL, Ops);
9280   }
9281 
9282   // Finally, this must be the case where we are shrinking elements: each input
9283   // turns into multiple outputs.
9284   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9285   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9286                             NumOutputsPerInput*BV->getNumOperands());
9287   SmallVector<SDValue, 8> Ops;
9288 
9289   for (const SDValue &Op : BV->op_values()) {
9290     if (Op.isUndef()) {
9291       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9292       continue;
9293     }
9294 
9295     APInt OpVal = cast<ConstantSDNode>(Op)->
9296                   getAPIntValue().zextOrTrunc(SrcBitSize);
9297 
9298     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9299       APInt ThisVal = OpVal.trunc(DstBitSize);
9300       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9301       OpVal.lshrInPlace(DstBitSize);
9302     }
9303 
9304     // For big endian targets, swap the order of the pieces of each element.
9305     if (DAG.getDataLayout().isBigEndian())
9306       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9307   }
9308 
9309   return DAG.getBuildVector(VT, DL, Ops);
9310 }
9311 
9312 static bool isContractable(SDNode *N) {
9313   SDNodeFlags F = N->getFlags();
9314   return F.hasAllowContract() || F.hasUnsafeAlgebra();
9315 }
9316 
9317 /// Try to perform FMA combining on a given FADD node.
9318 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9319   SDValue N0 = N->getOperand(0);
9320   SDValue N1 = N->getOperand(1);
9321   EVT VT = N->getValueType(0);
9322   SDLoc SL(N);
9323 
9324   const TargetOptions &Options = DAG.getTarget().Options;
9325 
9326   // Floating-point multiply-add with intermediate rounding.
9327   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9328 
9329   // Floating-point multiply-add without intermediate rounding.
9330   bool HasFMA =
9331       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9332       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9333 
9334   // No valid opcode, do not combine.
9335   if (!HasFMAD && !HasFMA)
9336     return SDValue();
9337 
9338   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9339                               Options.UnsafeFPMath || HasFMAD);
9340   // If the addition is not contractable, do not combine.
9341   if (!AllowFusionGlobally && !isContractable(N))
9342     return SDValue();
9343 
9344   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9345   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9346     return SDValue();
9347 
9348   // Always prefer FMAD to FMA for precision.
9349   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9350   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9351 
9352   // Is the node an FMUL and contractable either due to global flags or
9353   // SDNodeFlags.
9354   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9355     if (N.getOpcode() != ISD::FMUL)
9356       return false;
9357     return AllowFusionGlobally || isContractable(N.getNode());
9358   };
9359   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9360   // prefer to fold the multiply with fewer uses.
9361   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9362     if (N0.getNode()->use_size() > N1.getNode()->use_size())
9363       std::swap(N0, N1);
9364   }
9365 
9366   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9367   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9368     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9369                        N0.getOperand(0), N0.getOperand(1), N1);
9370   }
9371 
9372   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9373   // Note: Commutes FADD operands.
9374   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9375     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9376                        N1.getOperand(0), N1.getOperand(1), N0);
9377   }
9378 
9379   // Look through FP_EXTEND nodes to do more combining.
9380 
9381   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9382   if (N0.getOpcode() == ISD::FP_EXTEND) {
9383     SDValue N00 = N0.getOperand(0);
9384     if (isContractableFMUL(N00) &&
9385         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9386       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9387                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9388                                      N00.getOperand(0)),
9389                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9390                                      N00.getOperand(1)), N1);
9391     }
9392   }
9393 
9394   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9395   // Note: Commutes FADD operands.
9396   if (N1.getOpcode() == ISD::FP_EXTEND) {
9397     SDValue N10 = N1.getOperand(0);
9398     if (isContractableFMUL(N10) &&
9399         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9400       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9401                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9402                                      N10.getOperand(0)),
9403                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9404                                      N10.getOperand(1)), N0);
9405     }
9406   }
9407 
9408   // More folding opportunities when target permits.
9409   if (Aggressive) {
9410     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9411     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9412     // are currently only supported on binary nodes.
9413     if (Options.UnsafeFPMath &&
9414         N0.getOpcode() == PreferredFusedOpcode &&
9415         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9416         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9417       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9418                          N0.getOperand(0), N0.getOperand(1),
9419                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9420                                      N0.getOperand(2).getOperand(0),
9421                                      N0.getOperand(2).getOperand(1),
9422                                      N1));
9423     }
9424 
9425     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9426     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9427     // are currently only supported on binary nodes.
9428     if (Options.UnsafeFPMath &&
9429         N1->getOpcode() == PreferredFusedOpcode &&
9430         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9431         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9432       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9433                          N1.getOperand(0), N1.getOperand(1),
9434                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9435                                      N1.getOperand(2).getOperand(0),
9436                                      N1.getOperand(2).getOperand(1),
9437                                      N0));
9438     }
9439 
9440 
9441     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9442     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9443     auto FoldFAddFMAFPExtFMul = [&] (
9444       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9445       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9446                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9447                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9448                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9449                                      Z));
9450     };
9451     if (N0.getOpcode() == PreferredFusedOpcode) {
9452       SDValue N02 = N0.getOperand(2);
9453       if (N02.getOpcode() == ISD::FP_EXTEND) {
9454         SDValue N020 = N02.getOperand(0);
9455         if (isContractableFMUL(N020) &&
9456             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9457           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9458                                       N020.getOperand(0), N020.getOperand(1),
9459                                       N1);
9460         }
9461       }
9462     }
9463 
9464     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9465     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9466     // FIXME: This turns two single-precision and one double-precision
9467     // operation into two double-precision operations, which might not be
9468     // interesting for all targets, especially GPUs.
9469     auto FoldFAddFPExtFMAFMul = [&] (
9470       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
9471       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9472                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9473                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9474                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9475                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9476                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9477                                      Z));
9478     };
9479     if (N0.getOpcode() == ISD::FP_EXTEND) {
9480       SDValue N00 = N0.getOperand(0);
9481       if (N00.getOpcode() == PreferredFusedOpcode) {
9482         SDValue N002 = N00.getOperand(2);
9483         if (isContractableFMUL(N002) &&
9484             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9485           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9486                                       N002.getOperand(0), N002.getOperand(1),
9487                                       N1);
9488         }
9489       }
9490     }
9491 
9492     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9493     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9494     if (N1.getOpcode() == PreferredFusedOpcode) {
9495       SDValue N12 = N1.getOperand(2);
9496       if (N12.getOpcode() == ISD::FP_EXTEND) {
9497         SDValue N120 = N12.getOperand(0);
9498         if (isContractableFMUL(N120) &&
9499             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9500           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9501                                       N120.getOperand(0), N120.getOperand(1),
9502                                       N0);
9503         }
9504       }
9505     }
9506 
9507     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9508     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9509     // FIXME: This turns two single-precision and one double-precision
9510     // operation into two double-precision operations, which might not be
9511     // interesting for all targets, especially GPUs.
9512     if (N1.getOpcode() == ISD::FP_EXTEND) {
9513       SDValue N10 = N1.getOperand(0);
9514       if (N10.getOpcode() == PreferredFusedOpcode) {
9515         SDValue N102 = N10.getOperand(2);
9516         if (isContractableFMUL(N102) &&
9517             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9518           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9519                                       N102.getOperand(0), N102.getOperand(1),
9520                                       N0);
9521         }
9522       }
9523     }
9524   }
9525 
9526   return SDValue();
9527 }
9528 
9529 /// Try to perform FMA combining on a given FSUB node.
9530 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9531   SDValue N0 = N->getOperand(0);
9532   SDValue N1 = N->getOperand(1);
9533   EVT VT = N->getValueType(0);
9534   SDLoc SL(N);
9535 
9536   const TargetOptions &Options = DAG.getTarget().Options;
9537   // Floating-point multiply-add with intermediate rounding.
9538   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9539 
9540   // Floating-point multiply-add without intermediate rounding.
9541   bool HasFMA =
9542       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9543       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9544 
9545   // No valid opcode, do not combine.
9546   if (!HasFMAD && !HasFMA)
9547     return SDValue();
9548 
9549   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9550                               Options.UnsafeFPMath || HasFMAD);
9551   // If the subtraction is not contractable, do not combine.
9552   if (!AllowFusionGlobally && !isContractable(N))
9553     return SDValue();
9554 
9555   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9556   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9557     return SDValue();
9558 
9559   // Always prefer FMAD to FMA for precision.
9560   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9561   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9562 
9563   // Is the node an FMUL and contractable either due to global flags or
9564   // SDNodeFlags.
9565   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9566     if (N.getOpcode() != ISD::FMUL)
9567       return false;
9568     return AllowFusionGlobally || isContractable(N.getNode());
9569   };
9570 
9571   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
9572   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9573     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9574                        N0.getOperand(0), N0.getOperand(1),
9575                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9576   }
9577 
9578   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
9579   // Note: Commutes FSUB operands.
9580   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
9581     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9582                        DAG.getNode(ISD::FNEG, SL, VT,
9583                                    N1.getOperand(0)),
9584                        N1.getOperand(1), N0);
9585 
9586   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
9587   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
9588       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
9589     SDValue N00 = N0.getOperand(0).getOperand(0);
9590     SDValue N01 = N0.getOperand(0).getOperand(1);
9591     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9592                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
9593                        DAG.getNode(ISD::FNEG, SL, VT, N1));
9594   }
9595 
9596   // Look through FP_EXTEND nodes to do more combining.
9597 
9598   // fold (fsub (fpext (fmul x, y)), z)
9599   //   -> (fma (fpext x), (fpext y), (fneg z))
9600   if (N0.getOpcode() == ISD::FP_EXTEND) {
9601     SDValue N00 = N0.getOperand(0);
9602     if (isContractableFMUL(N00) &&
9603         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9604       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9605                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9606                                      N00.getOperand(0)),
9607                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9608                                      N00.getOperand(1)),
9609                          DAG.getNode(ISD::FNEG, SL, VT, N1));
9610     }
9611   }
9612 
9613   // fold (fsub x, (fpext (fmul y, z)))
9614   //   -> (fma (fneg (fpext y)), (fpext z), x)
9615   // Note: Commutes FSUB operands.
9616   if (N1.getOpcode() == ISD::FP_EXTEND) {
9617     SDValue N10 = N1.getOperand(0);
9618     if (isContractableFMUL(N10) &&
9619         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9620       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9621                          DAG.getNode(ISD::FNEG, SL, VT,
9622                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
9623                                                  N10.getOperand(0))),
9624                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9625                                      N10.getOperand(1)),
9626                          N0);
9627     }
9628   }
9629 
9630   // fold (fsub (fpext (fneg (fmul, x, y))), z)
9631   //   -> (fneg (fma (fpext x), (fpext y), z))
9632   // Note: This could be removed with appropriate canonicalization of the
9633   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9634   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9635   // from implementing the canonicalization in visitFSUB.
9636   if (N0.getOpcode() == ISD::FP_EXTEND) {
9637     SDValue N00 = N0.getOperand(0);
9638     if (N00.getOpcode() == ISD::FNEG) {
9639       SDValue N000 = N00.getOperand(0);
9640       if (isContractableFMUL(N000) &&
9641           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9642         return DAG.getNode(ISD::FNEG, SL, VT,
9643                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9644                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9645                                                    N000.getOperand(0)),
9646                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9647                                                    N000.getOperand(1)),
9648                                        N1));
9649       }
9650     }
9651   }
9652 
9653   // fold (fsub (fneg (fpext (fmul, x, y))), z)
9654   //   -> (fneg (fma (fpext x)), (fpext y), z)
9655   // Note: This could be removed with appropriate canonicalization of the
9656   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
9657   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
9658   // from implementing the canonicalization in visitFSUB.
9659   if (N0.getOpcode() == ISD::FNEG) {
9660     SDValue N00 = N0.getOperand(0);
9661     if (N00.getOpcode() == ISD::FP_EXTEND) {
9662       SDValue N000 = N00.getOperand(0);
9663       if (isContractableFMUL(N000) &&
9664           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
9665         return DAG.getNode(ISD::FNEG, SL, VT,
9666                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9667                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9668                                                    N000.getOperand(0)),
9669                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9670                                                    N000.getOperand(1)),
9671                                        N1));
9672       }
9673     }
9674   }
9675 
9676   // More folding opportunities when target permits.
9677   if (Aggressive) {
9678     // fold (fsub (fma x, y, (fmul u, v)), z)
9679     //   -> (fma x, y (fma u, v, (fneg z)))
9680     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9681     // are currently only supported on binary nodes.
9682     if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
9683         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
9684         N0.getOperand(2)->hasOneUse()) {
9685       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9686                          N0.getOperand(0), N0.getOperand(1),
9687                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9688                                      N0.getOperand(2).getOperand(0),
9689                                      N0.getOperand(2).getOperand(1),
9690                                      DAG.getNode(ISD::FNEG, SL, VT,
9691                                                  N1)));
9692     }
9693 
9694     // fold (fsub x, (fma y, z, (fmul u, v)))
9695     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
9696     // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
9697     // are currently only supported on binary nodes.
9698     if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
9699         isContractableFMUL(N1.getOperand(2))) {
9700       SDValue N20 = N1.getOperand(2).getOperand(0);
9701       SDValue N21 = N1.getOperand(2).getOperand(1);
9702       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9703                          DAG.getNode(ISD::FNEG, SL, VT,
9704                                      N1.getOperand(0)),
9705                          N1.getOperand(1),
9706                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9707                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
9708 
9709                                      N21, N0));
9710     }
9711 
9712 
9713     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
9714     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
9715     if (N0.getOpcode() == PreferredFusedOpcode) {
9716       SDValue N02 = N0.getOperand(2);
9717       if (N02.getOpcode() == ISD::FP_EXTEND) {
9718         SDValue N020 = N02.getOperand(0);
9719         if (isContractableFMUL(N020) &&
9720             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9721           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9722                              N0.getOperand(0), N0.getOperand(1),
9723                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9724                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9725                                                      N020.getOperand(0)),
9726                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9727                                                      N020.getOperand(1)),
9728                                          DAG.getNode(ISD::FNEG, SL, VT,
9729                                                      N1)));
9730         }
9731       }
9732     }
9733 
9734     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
9735     //   -> (fma (fpext x), (fpext y),
9736     //           (fma (fpext u), (fpext v), (fneg z)))
9737     // FIXME: This turns two single-precision and one double-precision
9738     // operation into two double-precision operations, which might not be
9739     // interesting for all targets, especially GPUs.
9740     if (N0.getOpcode() == ISD::FP_EXTEND) {
9741       SDValue N00 = N0.getOperand(0);
9742       if (N00.getOpcode() == PreferredFusedOpcode) {
9743         SDValue N002 = N00.getOperand(2);
9744         if (isContractableFMUL(N002) &&
9745             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9746           return DAG.getNode(PreferredFusedOpcode, SL, VT,
9747                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
9748                                          N00.getOperand(0)),
9749                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
9750                                          N00.getOperand(1)),
9751                              DAG.getNode(PreferredFusedOpcode, SL, VT,
9752                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9753                                                      N002.getOperand(0)),
9754                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9755                                                      N002.getOperand(1)),
9756                                          DAG.getNode(ISD::FNEG, SL, VT,
9757                                                      N1)));
9758         }
9759       }
9760     }
9761 
9762     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
9763     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
9764     if (N1.getOpcode() == PreferredFusedOpcode &&
9765         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
9766       SDValue N120 = N1.getOperand(2).getOperand(0);
9767       if (isContractableFMUL(N120) &&
9768           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9769         SDValue N1200 = N120.getOperand(0);
9770         SDValue N1201 = N120.getOperand(1);
9771         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9772                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
9773                            N1.getOperand(1),
9774                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9775                                        DAG.getNode(ISD::FNEG, SL, VT,
9776                                                    DAG.getNode(ISD::FP_EXTEND, SL,
9777                                                                VT, N1200)),
9778                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9779                                                    N1201),
9780                                        N0));
9781       }
9782     }
9783 
9784     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
9785     //   -> (fma (fneg (fpext y)), (fpext z),
9786     //           (fma (fneg (fpext u)), (fpext v), x))
9787     // FIXME: This turns two single-precision and one double-precision
9788     // operation into two double-precision operations, which might not be
9789     // interesting for all targets, especially GPUs.
9790     if (N1.getOpcode() == ISD::FP_EXTEND &&
9791         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
9792       SDValue CvtSrc = N1.getOperand(0);
9793       SDValue N100 = CvtSrc.getOperand(0);
9794       SDValue N101 = CvtSrc.getOperand(1);
9795       SDValue N102 = CvtSrc.getOperand(2);
9796       if (isContractableFMUL(N102) &&
9797           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
9798         SDValue N1020 = N102.getOperand(0);
9799         SDValue N1021 = N102.getOperand(1);
9800         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9801                            DAG.getNode(ISD::FNEG, SL, VT,
9802                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9803                                                    N100)),
9804                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
9805                            DAG.getNode(PreferredFusedOpcode, SL, VT,
9806                                        DAG.getNode(ISD::FNEG, SL, VT,
9807                                                    DAG.getNode(ISD::FP_EXTEND, SL,
9808                                                                VT, N1020)),
9809                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
9810                                                    N1021),
9811                                        N0));
9812       }
9813     }
9814   }
9815 
9816   return SDValue();
9817 }
9818 
9819 /// Try to perform FMA combining on a given FMUL node based on the distributive
9820 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
9821 /// subtraction instead of addition).
9822 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
9823   SDValue N0 = N->getOperand(0);
9824   SDValue N1 = N->getOperand(1);
9825   EVT VT = N->getValueType(0);
9826   SDLoc SL(N);
9827 
9828   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
9829 
9830   const TargetOptions &Options = DAG.getTarget().Options;
9831 
9832   // The transforms below are incorrect when x == 0 and y == inf, because the
9833   // intermediate multiplication produces a nan.
9834   if (!Options.NoInfsFPMath)
9835     return SDValue();
9836 
9837   // Floating-point multiply-add without intermediate rounding.
9838   bool HasFMA =
9839       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
9840       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9841       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9842 
9843   // Floating-point multiply-add with intermediate rounding. This can result
9844   // in a less precise result due to the changed rounding order.
9845   bool HasFMAD = Options.UnsafeFPMath &&
9846                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9847 
9848   // No valid opcode, do not combine.
9849   if (!HasFMAD && !HasFMA)
9850     return SDValue();
9851 
9852   // Always prefer FMAD to FMA for precision.
9853   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9854   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9855 
9856   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
9857   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
9858   auto FuseFADD = [&](SDValue X, SDValue Y) {
9859     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
9860       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9861       if (XC1 && XC1->isExactlyValue(+1.0))
9862         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9863       if (XC1 && XC1->isExactlyValue(-1.0))
9864         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9865                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9866     }
9867     return SDValue();
9868   };
9869 
9870   if (SDValue FMA = FuseFADD(N0, N1))
9871     return FMA;
9872   if (SDValue FMA = FuseFADD(N1, N0))
9873     return FMA;
9874 
9875   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
9876   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
9877   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
9878   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
9879   auto FuseFSUB = [&](SDValue X, SDValue Y) {
9880     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
9881       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
9882       if (XC0 && XC0->isExactlyValue(+1.0))
9883         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9884                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9885                            Y);
9886       if (XC0 && XC0->isExactlyValue(-1.0))
9887         return DAG.getNode(PreferredFusedOpcode, SL, VT,
9888                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
9889                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9890 
9891       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
9892       if (XC1 && XC1->isExactlyValue(+1.0))
9893         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
9894                            DAG.getNode(ISD::FNEG, SL, VT, Y));
9895       if (XC1 && XC1->isExactlyValue(-1.0))
9896         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
9897     }
9898     return SDValue();
9899   };
9900 
9901   if (SDValue FMA = FuseFSUB(N0, N1))
9902     return FMA;
9903   if (SDValue FMA = FuseFSUB(N1, N0))
9904     return FMA;
9905 
9906   return SDValue();
9907 }
9908 
9909 static bool isFMulNegTwo(SDValue &N) {
9910   if (N.getOpcode() != ISD::FMUL)
9911     return false;
9912   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
9913     return CFP->isExactlyValue(-2.0);
9914   return false;
9915 }
9916 
9917 SDValue DAGCombiner::visitFADD(SDNode *N) {
9918   SDValue N0 = N->getOperand(0);
9919   SDValue N1 = N->getOperand(1);
9920   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
9921   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
9922   EVT VT = N->getValueType(0);
9923   SDLoc DL(N);
9924   const TargetOptions &Options = DAG.getTarget().Options;
9925   const SDNodeFlags Flags = N->getFlags();
9926 
9927   // fold vector ops
9928   if (VT.isVector())
9929     if (SDValue FoldedVOp = SimplifyVBinOp(N))
9930       return FoldedVOp;
9931 
9932   // fold (fadd c1, c2) -> c1 + c2
9933   if (N0CFP && N1CFP)
9934     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
9935 
9936   // canonicalize constant to RHS
9937   if (N0CFP && !N1CFP)
9938     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
9939 
9940   if (SDValue NewSel = foldBinOpIntoSelect(N))
9941     return NewSel;
9942 
9943   // fold (fadd A, (fneg B)) -> (fsub A, B)
9944   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9945       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
9946     return DAG.getNode(ISD::FSUB, DL, VT, N0,
9947                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
9948 
9949   // fold (fadd (fneg A), B) -> (fsub B, A)
9950   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
9951       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
9952     return DAG.getNode(ISD::FSUB, DL, VT, N1,
9953                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
9954 
9955   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
9956   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
9957   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
9958       (isFMulNegTwo(N1) && N1.hasOneUse())) {
9959     bool N1IsFMul = isFMulNegTwo(N1);
9960     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
9961     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
9962     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
9963   }
9964 
9965   // FIXME: Auto-upgrade the target/function-level option.
9966   if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
9967     // fold (fadd A, 0) -> A
9968     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
9969       if (N1C->isZero())
9970         return N0;
9971   }
9972 
9973   // If 'unsafe math' is enabled, fold lots of things.
9974   if (Options.UnsafeFPMath) {
9975     // No FP constant should be created after legalization as Instruction
9976     // Selection pass has a hard time dealing with FP constants.
9977     bool AllowNewConst = (Level < AfterLegalizeDAG);
9978 
9979     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
9980     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
9981         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
9982       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
9983                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
9984                                      Flags),
9985                          Flags);
9986 
9987     // If allowed, fold (fadd (fneg x), x) -> 0.0
9988     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
9989       return DAG.getConstantFP(0.0, DL, VT);
9990 
9991     // If allowed, fold (fadd x, (fneg x)) -> 0.0
9992     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
9993       return DAG.getConstantFP(0.0, DL, VT);
9994 
9995     // We can fold chains of FADD's of the same value into multiplications.
9996     // This transform is not safe in general because we are reducing the number
9997     // of rounding steps.
9998     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
9999       if (N0.getOpcode() == ISD::FMUL) {
10000         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10001         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10002 
10003         // (fadd (fmul x, c), x) -> (fmul x, c+1)
10004         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10005           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10006                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10007           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10008         }
10009 
10010         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10011         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10012             N1.getOperand(0) == N1.getOperand(1) &&
10013             N0.getOperand(0) == N1.getOperand(0)) {
10014           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10015                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10016           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10017         }
10018       }
10019 
10020       if (N1.getOpcode() == ISD::FMUL) {
10021         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10022         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10023 
10024         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10025         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10026           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10027                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10028           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10029         }
10030 
10031         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10032         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10033             N0.getOperand(0) == N0.getOperand(1) &&
10034             N1.getOperand(0) == N0.getOperand(0)) {
10035           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10036                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10037           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10038         }
10039       }
10040 
10041       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
10042         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10043         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10044         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10045             (N0.getOperand(0) == N1)) {
10046           return DAG.getNode(ISD::FMUL, DL, VT,
10047                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10048         }
10049       }
10050 
10051       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
10052         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10053         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10054         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10055             N1.getOperand(0) == N0) {
10056           return DAG.getNode(ISD::FMUL, DL, VT,
10057                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10058         }
10059       }
10060 
10061       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10062       if (AllowNewConst &&
10063           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10064           N0.getOperand(0) == N0.getOperand(1) &&
10065           N1.getOperand(0) == N1.getOperand(1) &&
10066           N0.getOperand(0) == N1.getOperand(0)) {
10067         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10068                            DAG.getConstantFP(4.0, DL, VT), Flags);
10069       }
10070     }
10071   } // enable-unsafe-fp-math
10072 
10073   // FADD -> FMA combines:
10074   if (SDValue Fused = visitFADDForFMACombine(N)) {
10075     AddToWorklist(Fused.getNode());
10076     return Fused;
10077   }
10078   return SDValue();
10079 }
10080 
10081 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10082   SDValue N0 = N->getOperand(0);
10083   SDValue N1 = N->getOperand(1);
10084   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10085   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10086   EVT VT = N->getValueType(0);
10087   SDLoc DL(N);
10088   const TargetOptions &Options = DAG.getTarget().Options;
10089   const SDNodeFlags Flags = N->getFlags();
10090 
10091   // fold vector ops
10092   if (VT.isVector())
10093     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10094       return FoldedVOp;
10095 
10096   // fold (fsub c1, c2) -> c1-c2
10097   if (N0CFP && N1CFP)
10098     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10099 
10100   if (SDValue NewSel = foldBinOpIntoSelect(N))
10101     return NewSel;
10102 
10103   // fold (fsub A, (fneg B)) -> (fadd A, B)
10104   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10105     return DAG.getNode(ISD::FADD, DL, VT, N0,
10106                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10107 
10108   // FIXME: Auto-upgrade the target/function-level option.
10109   if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
10110     // (fsub 0, B) -> -B
10111     if (N0CFP && N0CFP->isZero()) {
10112       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10113         return GetNegatedExpression(N1, DAG, LegalOperations);
10114       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10115         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10116     }
10117   }
10118 
10119   // If 'unsafe math' is enabled, fold lots of things.
10120   if (Options.UnsafeFPMath) {
10121     // (fsub A, 0) -> A
10122     if (N1CFP && N1CFP->isZero())
10123       return N0;
10124 
10125     // (fsub x, x) -> 0.0
10126     if (N0 == N1)
10127       return DAG.getConstantFP(0.0f, DL, VT);
10128 
10129     // (fsub x, (fadd x, y)) -> (fneg y)
10130     // (fsub x, (fadd y, x)) -> (fneg y)
10131     if (N1.getOpcode() == ISD::FADD) {
10132       SDValue N10 = N1->getOperand(0);
10133       SDValue N11 = N1->getOperand(1);
10134 
10135       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
10136         return GetNegatedExpression(N11, DAG, LegalOperations);
10137 
10138       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
10139         return GetNegatedExpression(N10, DAG, LegalOperations);
10140     }
10141   }
10142 
10143   // FSUB -> FMA combines:
10144   if (SDValue Fused = visitFSUBForFMACombine(N)) {
10145     AddToWorklist(Fused.getNode());
10146     return Fused;
10147   }
10148 
10149   return SDValue();
10150 }
10151 
10152 SDValue DAGCombiner::visitFMUL(SDNode *N) {
10153   SDValue N0 = N->getOperand(0);
10154   SDValue N1 = N->getOperand(1);
10155   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10156   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10157   EVT VT = N->getValueType(0);
10158   SDLoc DL(N);
10159   const TargetOptions &Options = DAG.getTarget().Options;
10160   const SDNodeFlags Flags = N->getFlags();
10161 
10162   // fold vector ops
10163   if (VT.isVector()) {
10164     // This just handles C1 * C2 for vectors. Other vector folds are below.
10165     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10166       return FoldedVOp;
10167   }
10168 
10169   // fold (fmul c1, c2) -> c1*c2
10170   if (N0CFP && N1CFP)
10171     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10172 
10173   // canonicalize constant to RHS
10174   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10175      !isConstantFPBuildVectorOrConstantFP(N1))
10176     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10177 
10178   // fold (fmul A, 1.0) -> A
10179   if (N1CFP && N1CFP->isExactlyValue(1.0))
10180     return N0;
10181 
10182   if (SDValue NewSel = foldBinOpIntoSelect(N))
10183     return NewSel;
10184 
10185   if (Options.UnsafeFPMath) {
10186     // fold (fmul A, 0) -> 0
10187     if (N1CFP && N1CFP->isZero())
10188       return N1;
10189 
10190     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
10191     if (N0.getOpcode() == ISD::FMUL) {
10192       // Fold scalars or any vector constants (not just splats).
10193       // This fold is done in general by InstCombine, but extra fmul insts
10194       // may have been generated during lowering.
10195       SDValue N00 = N0.getOperand(0);
10196       SDValue N01 = N0.getOperand(1);
10197       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
10198       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
10199       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
10200 
10201       // Check 1: Make sure that the first operand of the inner multiply is NOT
10202       // a constant. Otherwise, we may induce infinite looping.
10203       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
10204         // Check 2: Make sure that the second operand of the inner multiply and
10205         // the second operand of the outer multiply are constants.
10206         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
10207             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
10208           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
10209           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
10210         }
10211       }
10212     }
10213 
10214     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
10215     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
10216     // during an early run of DAGCombiner can prevent folding with fmuls
10217     // inserted during lowering.
10218     if (N0.getOpcode() == ISD::FADD &&
10219         (N0.getOperand(0) == N0.getOperand(1)) &&
10220         N0.hasOneUse()) {
10221       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
10222       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
10223       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
10224     }
10225   }
10226 
10227   // fold (fmul X, 2.0) -> (fadd X, X)
10228   if (N1CFP && N1CFP->isExactlyValue(+2.0))
10229     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
10230 
10231   // fold (fmul X, -1.0) -> (fneg X)
10232   if (N1CFP && N1CFP->isExactlyValue(-1.0))
10233     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10234       return DAG.getNode(ISD::FNEG, DL, VT, N0);
10235 
10236   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
10237   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10238     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10239       // Both can be negated for free, check to see if at least one is cheaper
10240       // negated.
10241       if (LHSNeg == 2 || RHSNeg == 2)
10242         return DAG.getNode(ISD::FMUL, DL, VT,
10243                            GetNegatedExpression(N0, DAG, LegalOperations),
10244                            GetNegatedExpression(N1, DAG, LegalOperations),
10245                            Flags);
10246     }
10247   }
10248 
10249   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
10250   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
10251   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
10252       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
10253       TLI.isOperationLegal(ISD::FABS, VT)) {
10254     SDValue Select = N0, X = N1;
10255     if (Select.getOpcode() != ISD::SELECT)
10256       std::swap(Select, X);
10257 
10258     SDValue Cond = Select.getOperand(0);
10259     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
10260     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
10261 
10262     if (TrueOpnd && FalseOpnd &&
10263         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
10264         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
10265         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
10266       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10267       switch (CC) {
10268       default: break;
10269       case ISD::SETOLT:
10270       case ISD::SETULT:
10271       case ISD::SETOLE:
10272       case ISD::SETULE:
10273       case ISD::SETLT:
10274       case ISD::SETLE:
10275         std::swap(TrueOpnd, FalseOpnd);
10276         LLVM_FALLTHROUGH;
10277       case ISD::SETOGT:
10278       case ISD::SETUGT:
10279       case ISD::SETOGE:
10280       case ISD::SETUGE:
10281       case ISD::SETGT:
10282       case ISD::SETGE:
10283         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
10284             TLI.isOperationLegal(ISD::FNEG, VT))
10285           return DAG.getNode(ISD::FNEG, DL, VT,
10286                    DAG.getNode(ISD::FABS, DL, VT, X));
10287         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
10288           return DAG.getNode(ISD::FABS, DL, VT, X);
10289 
10290         break;
10291       }
10292     }
10293   }
10294 
10295   // FMUL -> FMA combines:
10296   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
10297     AddToWorklist(Fused.getNode());
10298     return Fused;
10299   }
10300 
10301   return SDValue();
10302 }
10303 
10304 SDValue DAGCombiner::visitFMA(SDNode *N) {
10305   SDValue N0 = N->getOperand(0);
10306   SDValue N1 = N->getOperand(1);
10307   SDValue N2 = N->getOperand(2);
10308   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10309   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10310   EVT VT = N->getValueType(0);
10311   SDLoc DL(N);
10312   const TargetOptions &Options = DAG.getTarget().Options;
10313 
10314   // Constant fold FMA.
10315   if (isa<ConstantFPSDNode>(N0) &&
10316       isa<ConstantFPSDNode>(N1) &&
10317       isa<ConstantFPSDNode>(N2)) {
10318     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10319   }
10320 
10321   if (Options.UnsafeFPMath) {
10322     if (N0CFP && N0CFP->isZero())
10323       return N2;
10324     if (N1CFP && N1CFP->isZero())
10325       return N2;
10326   }
10327   // TODO: The FMA node should have flags that propagate to these nodes.
10328   if (N0CFP && N0CFP->isExactlyValue(1.0))
10329     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10330   if (N1CFP && N1CFP->isExactlyValue(1.0))
10331     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10332 
10333   // Canonicalize (fma c, x, y) -> (fma x, c, y)
10334   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10335      !isConstantFPBuildVectorOrConstantFP(N1))
10336     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10337 
10338   // TODO: FMA nodes should have flags that propagate to the created nodes.
10339   // For now, create a Flags object for use with all unsafe math transforms.
10340   SDNodeFlags Flags;
10341   Flags.setUnsafeAlgebra(true);
10342 
10343   if (Options.UnsafeFPMath) {
10344     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10345     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10346         isConstantFPBuildVectorOrConstantFP(N1) &&
10347         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10348       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10349                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10350                                      Flags), Flags);
10351     }
10352 
10353     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10354     if (N0.getOpcode() == ISD::FMUL &&
10355         isConstantFPBuildVectorOrConstantFP(N1) &&
10356         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10357       return DAG.getNode(ISD::FMA, DL, VT,
10358                          N0.getOperand(0),
10359                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10360                                      Flags),
10361                          N2);
10362     }
10363   }
10364 
10365   // (fma x, 1, y) -> (fadd x, y)
10366   // (fma x, -1, y) -> (fadd (fneg x), y)
10367   if (N1CFP) {
10368     if (N1CFP->isExactlyValue(1.0))
10369       // TODO: The FMA node should have flags that propagate to this node.
10370       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10371 
10372     if (N1CFP->isExactlyValue(-1.0) &&
10373         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10374       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10375       AddToWorklist(RHSNeg.getNode());
10376       // TODO: The FMA node should have flags that propagate to this node.
10377       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10378     }
10379 
10380     // fma (fneg x), K, y -> fma x -K, y
10381     if (N0.getOpcode() == ISD::FNEG &&
10382         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10383          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
10384       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
10385                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
10386     }
10387   }
10388 
10389   if (Options.UnsafeFPMath) {
10390     // (fma x, c, x) -> (fmul x, (c+1))
10391     if (N1CFP && N0 == N2) {
10392       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10393                          DAG.getNode(ISD::FADD, DL, VT, N1,
10394                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10395                          Flags);
10396     }
10397 
10398     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10399     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10400       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10401                          DAG.getNode(ISD::FADD, DL, VT, N1,
10402                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10403                          Flags);
10404     }
10405   }
10406 
10407   return SDValue();
10408 }
10409 
10410 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10411 // reciprocal.
10412 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10413 // Notice that this is not always beneficial. One reason is different targets
10414 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10415 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10416 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10417 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10418   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10419   const SDNodeFlags Flags = N->getFlags();
10420   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10421     return SDValue();
10422 
10423   // Skip if current node is a reciprocal.
10424   SDValue N0 = N->getOperand(0);
10425   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10426   if (N0CFP && N0CFP->isExactlyValue(1.0))
10427     return SDValue();
10428 
10429   // Exit early if the target does not want this transform or if there can't
10430   // possibly be enough uses of the divisor to make the transform worthwhile.
10431   SDValue N1 = N->getOperand(1);
10432   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10433   if (!MinUses || N1->use_size() < MinUses)
10434     return SDValue();
10435 
10436   // Find all FDIV users of the same divisor.
10437   // Use a set because duplicates may be present in the user list.
10438   SetVector<SDNode *> Users;
10439   for (auto *U : N1->uses()) {
10440     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10441       // This division is eligible for optimization only if global unsafe math
10442       // is enabled or if this division allows reciprocal formation.
10443       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10444         Users.insert(U);
10445     }
10446   }
10447 
10448   // Now that we have the actual number of divisor uses, make sure it meets
10449   // the minimum threshold specified by the target.
10450   if (Users.size() < MinUses)
10451     return SDValue();
10452 
10453   EVT VT = N->getValueType(0);
10454   SDLoc DL(N);
10455   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10456   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10457 
10458   // Dividend / Divisor -> Dividend * Reciprocal
10459   for (auto *U : Users) {
10460     SDValue Dividend = U->getOperand(0);
10461     if (Dividend != FPOne) {
10462       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10463                                     Reciprocal, Flags);
10464       CombineTo(U, NewNode);
10465     } else if (U != Reciprocal.getNode()) {
10466       // In the absence of fast-math-flags, this user node is always the
10467       // same node as Reciprocal, but with FMF they may be different nodes.
10468       CombineTo(U, Reciprocal);
10469     }
10470   }
10471   return SDValue(N, 0);  // N was replaced.
10472 }
10473 
10474 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10475   SDValue N0 = N->getOperand(0);
10476   SDValue N1 = N->getOperand(1);
10477   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10478   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10479   EVT VT = N->getValueType(0);
10480   SDLoc DL(N);
10481   const TargetOptions &Options = DAG.getTarget().Options;
10482   SDNodeFlags Flags = N->getFlags();
10483 
10484   // fold vector ops
10485   if (VT.isVector())
10486     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10487       return FoldedVOp;
10488 
10489   // fold (fdiv c1, c2) -> c1/c2
10490   if (N0CFP && N1CFP)
10491     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10492 
10493   if (SDValue NewSel = foldBinOpIntoSelect(N))
10494     return NewSel;
10495 
10496   if (Options.UnsafeFPMath) {
10497     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10498     if (N1CFP) {
10499       // Compute the reciprocal 1.0 / c2.
10500       const APFloat &N1APF = N1CFP->getValueAPF();
10501       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10502       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10503       // Only do the transform if the reciprocal is a legal fp immediate that
10504       // isn't too nasty (eg NaN, denormal, ...).
10505       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10506           (!LegalOperations ||
10507            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10508            // backend)... we should handle this gracefully after Legalize.
10509            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
10510            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10511            TLI.isFPImmLegal(Recip, VT)))
10512         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10513                            DAG.getConstantFP(Recip, DL, VT), Flags);
10514     }
10515 
10516     // If this FDIV is part of a reciprocal square root, it may be folded
10517     // into a target-specific square root estimate instruction.
10518     if (N1.getOpcode() == ISD::FSQRT) {
10519       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10520         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10521       }
10522     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10523                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10524       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10525                                           Flags)) {
10526         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10527         AddToWorklist(RV.getNode());
10528         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10529       }
10530     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10531                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10532       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10533                                           Flags)) {
10534         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10535         AddToWorklist(RV.getNode());
10536         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10537       }
10538     } else if (N1.getOpcode() == ISD::FMUL) {
10539       // Look through an FMUL. Even though this won't remove the FDIV directly,
10540       // it's still worthwhile to get rid of the FSQRT if possible.
10541       SDValue SqrtOp;
10542       SDValue OtherOp;
10543       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10544         SqrtOp = N1.getOperand(0);
10545         OtherOp = N1.getOperand(1);
10546       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10547         SqrtOp = N1.getOperand(1);
10548         OtherOp = N1.getOperand(0);
10549       }
10550       if (SqrtOp.getNode()) {
10551         // We found a FSQRT, so try to make this fold:
10552         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10553         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10554           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10555           AddToWorklist(RV.getNode());
10556           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10557         }
10558       }
10559     }
10560 
10561     // Fold into a reciprocal estimate and multiply instead of a real divide.
10562     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
10563       AddToWorklist(RV.getNode());
10564       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10565     }
10566   }
10567 
10568   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
10569   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10570     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10571       // Both can be negated for free, check to see if at least one is cheaper
10572       // negated.
10573       if (LHSNeg == 2 || RHSNeg == 2)
10574         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
10575                            GetNegatedExpression(N0, DAG, LegalOperations),
10576                            GetNegatedExpression(N1, DAG, LegalOperations),
10577                            Flags);
10578     }
10579   }
10580 
10581   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
10582     return CombineRepeatedDivisors;
10583 
10584   return SDValue();
10585 }
10586 
10587 SDValue DAGCombiner::visitFREM(SDNode *N) {
10588   SDValue N0 = N->getOperand(0);
10589   SDValue N1 = N->getOperand(1);
10590   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10591   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10592   EVT VT = N->getValueType(0);
10593 
10594   // fold (frem c1, c2) -> fmod(c1,c2)
10595   if (N0CFP && N1CFP)
10596     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
10597 
10598   if (SDValue NewSel = foldBinOpIntoSelect(N))
10599     return NewSel;
10600 
10601   return SDValue();
10602 }
10603 
10604 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
10605   if (!DAG.getTarget().Options.UnsafeFPMath)
10606     return SDValue();
10607 
10608   SDValue N0 = N->getOperand(0);
10609   if (TLI.isFsqrtCheap(N0, DAG))
10610     return SDValue();
10611 
10612   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
10613   // For now, create a Flags object for use with all unsafe math transforms.
10614   SDNodeFlags Flags;
10615   Flags.setUnsafeAlgebra(true);
10616   return buildSqrtEstimate(N0, Flags);
10617 }
10618 
10619 /// copysign(x, fp_extend(y)) -> copysign(x, y)
10620 /// copysign(x, fp_round(y)) -> copysign(x, y)
10621 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
10622   SDValue N1 = N->getOperand(1);
10623   if ((N1.getOpcode() == ISD::FP_EXTEND ||
10624        N1.getOpcode() == ISD::FP_ROUND)) {
10625     // Do not optimize out type conversion of f128 type yet.
10626     // For some targets like x86_64, configuration is changed to keep one f128
10627     // value in one SSE register, but instruction selection cannot handle
10628     // FCOPYSIGN on SSE registers yet.
10629     EVT N1VT = N1->getValueType(0);
10630     EVT N1Op0VT = N1->getOperand(0).getValueType();
10631     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
10632   }
10633   return false;
10634 }
10635 
10636 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
10637   SDValue N0 = N->getOperand(0);
10638   SDValue N1 = N->getOperand(1);
10639   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10640   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10641   EVT VT = N->getValueType(0);
10642 
10643   if (N0CFP && N1CFP) // Constant fold
10644     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
10645 
10646   if (N1CFP) {
10647     const APFloat &V = N1CFP->getValueAPF();
10648     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
10649     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
10650     if (!V.isNegative()) {
10651       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
10652         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10653     } else {
10654       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10655         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
10656                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
10657     }
10658   }
10659 
10660   // copysign(fabs(x), y) -> copysign(x, y)
10661   // copysign(fneg(x), y) -> copysign(x, y)
10662   // copysign(copysign(x,z), y) -> copysign(x, y)
10663   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
10664       N0.getOpcode() == ISD::FCOPYSIGN)
10665     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
10666 
10667   // copysign(x, abs(y)) -> abs(x)
10668   if (N1.getOpcode() == ISD::FABS)
10669     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
10670 
10671   // copysign(x, copysign(y,z)) -> copysign(x, z)
10672   if (N1.getOpcode() == ISD::FCOPYSIGN)
10673     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
10674 
10675   // copysign(x, fp_extend(y)) -> copysign(x, y)
10676   // copysign(x, fp_round(y)) -> copysign(x, y)
10677   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
10678     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
10679 
10680   return SDValue();
10681 }
10682 
10683 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
10684   SDValue N0 = N->getOperand(0);
10685   EVT VT = N->getValueType(0);
10686   EVT OpVT = N0.getValueType();
10687 
10688   // fold (sint_to_fp c1) -> c1fp
10689   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10690       // ...but only if the target supports immediate floating-point values
10691       (!LegalOperations ||
10692        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10693     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10694 
10695   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
10696   // but UINT_TO_FP is legal on this target, try to convert.
10697   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
10698       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
10699     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
10700     if (DAG.SignBitIsZero(N0))
10701       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10702   }
10703 
10704   // The next optimizations are desirable only if SELECT_CC can be lowered.
10705   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10706     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10707     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
10708         !VT.isVector() &&
10709         (!LegalOperations ||
10710          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10711       SDLoc DL(N);
10712       SDValue Ops[] =
10713         { N0.getOperand(0), N0.getOperand(1),
10714           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10715           N0.getOperand(2) };
10716       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10717     }
10718 
10719     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
10720     //      (select_cc x, y, 1.0, 0.0,, cc)
10721     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
10722         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
10723         (!LegalOperations ||
10724          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10725       SDLoc DL(N);
10726       SDValue Ops[] =
10727         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
10728           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10729           N0.getOperand(0).getOperand(2) };
10730       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10731     }
10732   }
10733 
10734   return SDValue();
10735 }
10736 
10737 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
10738   SDValue N0 = N->getOperand(0);
10739   EVT VT = N->getValueType(0);
10740   EVT OpVT = N0.getValueType();
10741 
10742   // fold (uint_to_fp c1) -> c1fp
10743   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
10744       // ...but only if the target supports immediate floating-point values
10745       (!LegalOperations ||
10746        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
10747     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
10748 
10749   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
10750   // but SINT_TO_FP is legal on this target, try to convert.
10751   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
10752       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
10753     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
10754     if (DAG.SignBitIsZero(N0))
10755       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
10756   }
10757 
10758   // The next optimizations are desirable only if SELECT_CC can be lowered.
10759   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
10760     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
10761     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
10762         (!LegalOperations ||
10763          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
10764       SDLoc DL(N);
10765       SDValue Ops[] =
10766         { N0.getOperand(0), N0.getOperand(1),
10767           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
10768           N0.getOperand(2) };
10769       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
10770     }
10771   }
10772 
10773   return SDValue();
10774 }
10775 
10776 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
10777 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
10778   SDValue N0 = N->getOperand(0);
10779   EVT VT = N->getValueType(0);
10780 
10781   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
10782     return SDValue();
10783 
10784   SDValue Src = N0.getOperand(0);
10785   EVT SrcVT = Src.getValueType();
10786   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
10787   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
10788 
10789   // We can safely assume the conversion won't overflow the output range,
10790   // because (for example) (uint8_t)18293.f is undefined behavior.
10791 
10792   // Since we can assume the conversion won't overflow, our decision as to
10793   // whether the input will fit in the float should depend on the minimum
10794   // of the input range and output range.
10795 
10796   // This means this is also safe for a signed input and unsigned output, since
10797   // a negative input would lead to undefined behavior.
10798   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
10799   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
10800   unsigned ActualSize = std::min(InputSize, OutputSize);
10801   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
10802 
10803   // We can only fold away the float conversion if the input range can be
10804   // represented exactly in the float range.
10805   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
10806     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
10807       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
10808                                                        : ISD::ZERO_EXTEND;
10809       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
10810     }
10811     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
10812       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
10813     return DAG.getBitcast(VT, Src);
10814   }
10815   return SDValue();
10816 }
10817 
10818 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
10819   SDValue N0 = N->getOperand(0);
10820   EVT VT = N->getValueType(0);
10821 
10822   // fold (fp_to_sint c1fp) -> c1
10823   if (isConstantFPBuildVectorOrConstantFP(N0))
10824     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
10825 
10826   return FoldIntToFPToInt(N, DAG);
10827 }
10828 
10829 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
10830   SDValue N0 = N->getOperand(0);
10831   EVT VT = N->getValueType(0);
10832 
10833   // fold (fp_to_uint c1fp) -> c1
10834   if (isConstantFPBuildVectorOrConstantFP(N0))
10835     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
10836 
10837   return FoldIntToFPToInt(N, DAG);
10838 }
10839 
10840 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
10841   SDValue N0 = N->getOperand(0);
10842   SDValue N1 = N->getOperand(1);
10843   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10844   EVT VT = N->getValueType(0);
10845 
10846   // fold (fp_round c1fp) -> c1fp
10847   if (N0CFP)
10848     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
10849 
10850   // fold (fp_round (fp_extend x)) -> x
10851   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
10852     return N0.getOperand(0);
10853 
10854   // fold (fp_round (fp_round x)) -> (fp_round x)
10855   if (N0.getOpcode() == ISD::FP_ROUND) {
10856     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
10857     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
10858 
10859     // Skip this folding if it results in an fp_round from f80 to f16.
10860     //
10861     // f80 to f16 always generates an expensive (and as yet, unimplemented)
10862     // libcall to __truncxfhf2 instead of selecting native f16 conversion
10863     // instructions from f32 or f64.  Moreover, the first (value-preserving)
10864     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
10865     // x86.
10866     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
10867       return SDValue();
10868 
10869     // If the first fp_round isn't a value preserving truncation, it might
10870     // introduce a tie in the second fp_round, that wouldn't occur in the
10871     // single-step fp_round we want to fold to.
10872     // In other words, double rounding isn't the same as rounding.
10873     // Also, this is a value preserving truncation iff both fp_round's are.
10874     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
10875       SDLoc DL(N);
10876       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
10877                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
10878     }
10879   }
10880 
10881   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
10882   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
10883     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
10884                               N0.getOperand(0), N1);
10885     AddToWorklist(Tmp.getNode());
10886     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
10887                        Tmp, N0.getOperand(1));
10888   }
10889 
10890   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10891     return NewVSel;
10892 
10893   return SDValue();
10894 }
10895 
10896 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
10897   SDValue N0 = N->getOperand(0);
10898   EVT VT = N->getValueType(0);
10899   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10900   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10901 
10902   // fold (fp_round_inreg c1fp) -> c1fp
10903   if (N0CFP && isTypeLegal(EVT)) {
10904     SDLoc DL(N);
10905     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
10906     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
10907   }
10908 
10909   return SDValue();
10910 }
10911 
10912 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
10913   SDValue N0 = N->getOperand(0);
10914   EVT VT = N->getValueType(0);
10915 
10916   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
10917   if (N->hasOneUse() &&
10918       N->use_begin()->getOpcode() == ISD::FP_ROUND)
10919     return SDValue();
10920 
10921   // fold (fp_extend c1fp) -> c1fp
10922   if (isConstantFPBuildVectorOrConstantFP(N0))
10923     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
10924 
10925   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
10926   if (N0.getOpcode() == ISD::FP16_TO_FP &&
10927       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
10928     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
10929 
10930   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
10931   // value of X.
10932   if (N0.getOpcode() == ISD::FP_ROUND
10933       && N0.getConstantOperandVal(1) == 1) {
10934     SDValue In = N0.getOperand(0);
10935     if (In.getValueType() == VT) return In;
10936     if (VT.bitsLT(In.getValueType()))
10937       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
10938                          In, N0.getOperand(1));
10939     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
10940   }
10941 
10942   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
10943   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10944        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10945     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10946     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10947                                      LN0->getChain(),
10948                                      LN0->getBasePtr(), N0.getValueType(),
10949                                      LN0->getMemOperand());
10950     CombineTo(N, ExtLoad);
10951     CombineTo(N0.getNode(),
10952               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
10953                           N0.getValueType(), ExtLoad,
10954                           DAG.getIntPtrConstant(1, SDLoc(N0))),
10955               ExtLoad.getValue(1));
10956     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10957   }
10958 
10959   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10960     return NewVSel;
10961 
10962   return SDValue();
10963 }
10964 
10965 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
10966   SDValue N0 = N->getOperand(0);
10967   EVT VT = N->getValueType(0);
10968 
10969   // fold (fceil c1) -> fceil(c1)
10970   if (isConstantFPBuildVectorOrConstantFP(N0))
10971     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
10972 
10973   return SDValue();
10974 }
10975 
10976 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
10977   SDValue N0 = N->getOperand(0);
10978   EVT VT = N->getValueType(0);
10979 
10980   // fold (ftrunc c1) -> ftrunc(c1)
10981   if (isConstantFPBuildVectorOrConstantFP(N0))
10982     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
10983 
10984   // fold ftrunc (known rounded int x) -> x
10985   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
10986   // likely to be generated to extract integer from a rounded floating value.
10987   switch (N0.getOpcode()) {
10988   default: break;
10989   case ISD::FRINT:
10990   case ISD::FTRUNC:
10991   case ISD::FNEARBYINT:
10992   case ISD::FFLOOR:
10993   case ISD::FCEIL:
10994     return N0;
10995   }
10996 
10997   return SDValue();
10998 }
10999 
11000 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11001   SDValue N0 = N->getOperand(0);
11002   EVT VT = N->getValueType(0);
11003 
11004   // fold (ffloor c1) -> ffloor(c1)
11005   if (isConstantFPBuildVectorOrConstantFP(N0))
11006     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11007 
11008   return SDValue();
11009 }
11010 
11011 // FIXME: FNEG and FABS have a lot in common; refactor.
11012 SDValue DAGCombiner::visitFNEG(SDNode *N) {
11013   SDValue N0 = N->getOperand(0);
11014   EVT VT = N->getValueType(0);
11015 
11016   // Constant fold FNEG.
11017   if (isConstantFPBuildVectorOrConstantFP(N0))
11018     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11019 
11020   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11021                          &DAG.getTarget().Options))
11022     return GetNegatedExpression(N0, DAG, LegalOperations);
11023 
11024   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11025   // constant pool values.
11026   if (!TLI.isFNegFree(VT) &&
11027       N0.getOpcode() == ISD::BITCAST &&
11028       N0.getNode()->hasOneUse()) {
11029     SDValue Int = N0.getOperand(0);
11030     EVT IntVT = Int.getValueType();
11031     if (IntVT.isInteger() && !IntVT.isVector()) {
11032       APInt SignMask;
11033       if (N0.getValueType().isVector()) {
11034         // For a vector, get a mask such as 0x80... per scalar element
11035         // and splat it.
11036         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11037         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11038       } else {
11039         // For a scalar, just generate 0x80...
11040         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11041       }
11042       SDLoc DL0(N0);
11043       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11044                         DAG.getConstant(SignMask, DL0, IntVT));
11045       AddToWorklist(Int.getNode());
11046       return DAG.getBitcast(VT, Int);
11047     }
11048   }
11049 
11050   // (fneg (fmul c, x)) -> (fmul -c, x)
11051   if (N0.getOpcode() == ISD::FMUL &&
11052       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11053     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11054     if (CFP1) {
11055       APFloat CVal = CFP1->getValueAPF();
11056       CVal.changeSign();
11057       if (Level >= AfterLegalizeDAG &&
11058           (TLI.isFPImmLegal(CVal, VT) ||
11059            TLI.isOperationLegal(ISD::ConstantFP, VT)))
11060         return DAG.getNode(
11061             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11062             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11063             N0->getFlags());
11064     }
11065   }
11066 
11067   return SDValue();
11068 }
11069 
11070 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11071   SDValue N0 = N->getOperand(0);
11072   SDValue N1 = N->getOperand(1);
11073   EVT VT = N->getValueType(0);
11074   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11075   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11076 
11077   if (N0CFP && N1CFP) {
11078     const APFloat &C0 = N0CFP->getValueAPF();
11079     const APFloat &C1 = N1CFP->getValueAPF();
11080     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11081   }
11082 
11083   // Canonicalize to constant on RHS.
11084   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11085      !isConstantFPBuildVectorOrConstantFP(N1))
11086     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11087 
11088   return SDValue();
11089 }
11090 
11091 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11092   SDValue N0 = N->getOperand(0);
11093   SDValue N1 = N->getOperand(1);
11094   EVT VT = N->getValueType(0);
11095   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11096   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11097 
11098   if (N0CFP && N1CFP) {
11099     const APFloat &C0 = N0CFP->getValueAPF();
11100     const APFloat &C1 = N1CFP->getValueAPF();
11101     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11102   }
11103 
11104   // Canonicalize to constant on RHS.
11105   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11106      !isConstantFPBuildVectorOrConstantFP(N1))
11107     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11108 
11109   return SDValue();
11110 }
11111 
11112 SDValue DAGCombiner::visitFABS(SDNode *N) {
11113   SDValue N0 = N->getOperand(0);
11114   EVT VT = N->getValueType(0);
11115 
11116   // fold (fabs c1) -> fabs(c1)
11117   if (isConstantFPBuildVectorOrConstantFP(N0))
11118     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11119 
11120   // fold (fabs (fabs x)) -> (fabs x)
11121   if (N0.getOpcode() == ISD::FABS)
11122     return N->getOperand(0);
11123 
11124   // fold (fabs (fneg x)) -> (fabs x)
11125   // fold (fabs (fcopysign x, y)) -> (fabs x)
11126   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11127     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11128 
11129   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11130   // constant pool values.
11131   if (!TLI.isFAbsFree(VT) &&
11132       N0.getOpcode() == ISD::BITCAST &&
11133       N0.getNode()->hasOneUse()) {
11134     SDValue Int = N0.getOperand(0);
11135     EVT IntVT = Int.getValueType();
11136     if (IntVT.isInteger() && !IntVT.isVector()) {
11137       APInt SignMask;
11138       if (N0.getValueType().isVector()) {
11139         // For a vector, get a mask such as 0x7f... per scalar element
11140         // and splat it.
11141         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
11142         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11143       } else {
11144         // For a scalar, just generate 0x7f...
11145         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
11146       }
11147       SDLoc DL(N0);
11148       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
11149                         DAG.getConstant(SignMask, DL, IntVT));
11150       AddToWorklist(Int.getNode());
11151       return DAG.getBitcast(N->getValueType(0), Int);
11152     }
11153   }
11154 
11155   return SDValue();
11156 }
11157 
11158 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
11159   SDValue Chain = N->getOperand(0);
11160   SDValue N1 = N->getOperand(1);
11161   SDValue N2 = N->getOperand(2);
11162 
11163   // If N is a constant we could fold this into a fallthrough or unconditional
11164   // branch. However that doesn't happen very often in normal code, because
11165   // Instcombine/SimplifyCFG should have handled the available opportunities.
11166   // If we did this folding here, it would be necessary to update the
11167   // MachineBasicBlock CFG, which is awkward.
11168 
11169   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
11170   // on the target.
11171   if (N1.getOpcode() == ISD::SETCC &&
11172       TLI.isOperationLegalOrCustom(ISD::BR_CC,
11173                                    N1.getOperand(0).getValueType())) {
11174     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11175                        Chain, N1.getOperand(2),
11176                        N1.getOperand(0), N1.getOperand(1), N2);
11177   }
11178 
11179   if (N1.hasOneUse()) {
11180     if (SDValue NewN1 = rebuildSetCC(N1))
11181       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
11182   }
11183 
11184   return SDValue();
11185 }
11186 
11187 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
11188   if (N.getOpcode() == ISD::SRL ||
11189       (N.getOpcode() == ISD::TRUNCATE &&
11190        (N.getOperand(0).hasOneUse() &&
11191         N.getOperand(0).getOpcode() == ISD::SRL))) {
11192     // Look pass the truncate.
11193     if (N.getOpcode() == ISD::TRUNCATE)
11194       N = N.getOperand(0);
11195 
11196     // Match this pattern so that we can generate simpler code:
11197     //
11198     //   %a = ...
11199     //   %b = and i32 %a, 2
11200     //   %c = srl i32 %b, 1
11201     //   brcond i32 %c ...
11202     //
11203     // into
11204     //
11205     //   %a = ...
11206     //   %b = and i32 %a, 2
11207     //   %c = setcc eq %b, 0
11208     //   brcond %c ...
11209     //
11210     // This applies only when the AND constant value has one bit set and the
11211     // SRL constant is equal to the log2 of the AND constant. The back-end is
11212     // smart enough to convert the result into a TEST/JMP sequence.
11213     SDValue Op0 = N.getOperand(0);
11214     SDValue Op1 = N.getOperand(1);
11215 
11216     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
11217       SDValue AndOp1 = Op0.getOperand(1);
11218 
11219       if (AndOp1.getOpcode() == ISD::Constant) {
11220         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
11221 
11222         if (AndConst.isPowerOf2() &&
11223             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
11224           SDLoc DL(N);
11225           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
11226                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
11227                               ISD::SETNE);
11228         }
11229       }
11230     }
11231   }
11232 
11233   // Transform br(xor(x, y)) -> br(x != y)
11234   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
11235   if (N.getOpcode() == ISD::XOR) {
11236     SDNode *TheXor = N.getNode();
11237 
11238     // Avoid missing important xor optimizations.
11239     while (SDValue Tmp = visitXOR(TheXor)) {
11240       // We don't have a XOR anymore, bail.
11241       if (Tmp.getOpcode() != ISD::XOR)
11242         return Tmp;
11243 
11244       TheXor = Tmp.getNode();
11245     }
11246 
11247     SDValue Op0 = TheXor->getOperand(0);
11248     SDValue Op1 = TheXor->getOperand(1);
11249 
11250     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
11251       bool Equal = false;
11252       if (isOneConstant(Op0) && Op0.hasOneUse() &&
11253           Op0.getOpcode() == ISD::XOR) {
11254         TheXor = Op0.getNode();
11255         Equal = true;
11256       }
11257 
11258       EVT SetCCVT = N.getValueType();
11259       if (LegalTypes)
11260         SetCCVT = getSetCCResultType(SetCCVT);
11261       // Replace the uses of XOR with SETCC
11262       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
11263                           Equal ? ISD::SETEQ : ISD::SETNE);
11264     }
11265   }
11266 
11267   return SDValue();
11268 }
11269 
11270 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
11271 //
11272 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
11273   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
11274   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
11275 
11276   // If N is a constant we could fold this into a fallthrough or unconditional
11277   // branch. However that doesn't happen very often in normal code, because
11278   // Instcombine/SimplifyCFG should have handled the available opportunities.
11279   // If we did this folding here, it would be necessary to update the
11280   // MachineBasicBlock CFG, which is awkward.
11281 
11282   // Use SimplifySetCC to simplify SETCC's.
11283   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
11284                                CondLHS, CondRHS, CC->get(), SDLoc(N),
11285                                false);
11286   if (Simp.getNode()) AddToWorklist(Simp.getNode());
11287 
11288   // fold to a simpler setcc
11289   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11290     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11291                        N->getOperand(0), Simp.getOperand(2),
11292                        Simp.getOperand(0), Simp.getOperand(1),
11293                        N->getOperand(4));
11294 
11295   return SDValue();
11296 }
11297 
11298 /// Return true if 'Use' is a load or a store that uses N as its base pointer
11299 /// and that N may be folded in the load / store addressing mode.
11300 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11301                                     SelectionDAG &DAG,
11302                                     const TargetLowering &TLI) {
11303   EVT VT;
11304   unsigned AS;
11305 
11306   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
11307     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11308       return false;
11309     VT = LD->getMemoryVT();
11310     AS = LD->getAddressSpace();
11311   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
11312     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11313       return false;
11314     VT = ST->getMemoryVT();
11315     AS = ST->getAddressSpace();
11316   } else
11317     return false;
11318 
11319   TargetLowering::AddrMode AM;
11320   if (N->getOpcode() == ISD::ADD) {
11321     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11322     if (Offset)
11323       // [reg +/- imm]
11324       AM.BaseOffs = Offset->getSExtValue();
11325     else
11326       // [reg +/- reg]
11327       AM.Scale = 1;
11328   } else if (N->getOpcode() == ISD::SUB) {
11329     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11330     if (Offset)
11331       // [reg +/- imm]
11332       AM.BaseOffs = -Offset->getSExtValue();
11333     else
11334       // [reg +/- reg]
11335       AM.Scale = 1;
11336   } else
11337     return false;
11338 
11339   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11340                                    VT.getTypeForEVT(*DAG.getContext()), AS);
11341 }
11342 
11343 /// Try turning a load/store into a pre-indexed load/store when the base
11344 /// pointer is an add or subtract and it has other uses besides the load/store.
11345 /// After the transformation, the new indexed load/store has effectively folded
11346 /// the add/subtract in and all of its other uses are redirected to the
11347 /// new load/store.
11348 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11349   if (Level < AfterLegalizeDAG)
11350     return false;
11351 
11352   bool isLoad = true;
11353   SDValue Ptr;
11354   EVT VT;
11355   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11356     if (LD->isIndexed())
11357       return false;
11358     VT = LD->getMemoryVT();
11359     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11360         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11361       return false;
11362     Ptr = LD->getBasePtr();
11363   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11364     if (ST->isIndexed())
11365       return false;
11366     VT = ST->getMemoryVT();
11367     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11368         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11369       return false;
11370     Ptr = ST->getBasePtr();
11371     isLoad = false;
11372   } else {
11373     return false;
11374   }
11375 
11376   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11377   // out.  There is no reason to make this a preinc/predec.
11378   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11379       Ptr.getNode()->hasOneUse())
11380     return false;
11381 
11382   // Ask the target to do addressing mode selection.
11383   SDValue BasePtr;
11384   SDValue Offset;
11385   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11386   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11387     return false;
11388 
11389   // Backends without true r+i pre-indexed forms may need to pass a
11390   // constant base with a variable offset so that constant coercion
11391   // will work with the patterns in canonical form.
11392   bool Swapped = false;
11393   if (isa<ConstantSDNode>(BasePtr)) {
11394     std::swap(BasePtr, Offset);
11395     Swapped = true;
11396   }
11397 
11398   // Don't create a indexed load / store with zero offset.
11399   if (isNullConstant(Offset))
11400     return false;
11401 
11402   // Try turning it into a pre-indexed load / store except when:
11403   // 1) The new base ptr is a frame index.
11404   // 2) If N is a store and the new base ptr is either the same as or is a
11405   //    predecessor of the value being stored.
11406   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11407   //    that would create a cycle.
11408   // 4) All uses are load / store ops that use it as old base ptr.
11409 
11410   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11411   // (plus the implicit offset) to a register to preinc anyway.
11412   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11413     return false;
11414 
11415   // Check #2.
11416   if (!isLoad) {
11417     SDValue Val = cast<StoreSDNode>(N)->getValue();
11418     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11419       return false;
11420   }
11421 
11422   // Caches for hasPredecessorHelper.
11423   SmallPtrSet<const SDNode *, 32> Visited;
11424   SmallVector<const SDNode *, 16> Worklist;
11425   Worklist.push_back(N);
11426 
11427   // If the offset is a constant, there may be other adds of constants that
11428   // can be folded with this one. We should do this to avoid having to keep
11429   // a copy of the original base pointer.
11430   SmallVector<SDNode *, 16> OtherUses;
11431   if (isa<ConstantSDNode>(Offset))
11432     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11433                               UE = BasePtr.getNode()->use_end();
11434          UI != UE; ++UI) {
11435       SDUse &Use = UI.getUse();
11436       // Skip the use that is Ptr and uses of other results from BasePtr's
11437       // node (important for nodes that return multiple results).
11438       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11439         continue;
11440 
11441       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11442         continue;
11443 
11444       if (Use.getUser()->getOpcode() != ISD::ADD &&
11445           Use.getUser()->getOpcode() != ISD::SUB) {
11446         OtherUses.clear();
11447         break;
11448       }
11449 
11450       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11451       if (!isa<ConstantSDNode>(Op1)) {
11452         OtherUses.clear();
11453         break;
11454       }
11455 
11456       // FIXME: In some cases, we can be smarter about this.
11457       if (Op1.getValueType() != Offset.getValueType()) {
11458         OtherUses.clear();
11459         break;
11460       }
11461 
11462       OtherUses.push_back(Use.getUser());
11463     }
11464 
11465   if (Swapped)
11466     std::swap(BasePtr, Offset);
11467 
11468   // Now check for #3 and #4.
11469   bool RealUse = false;
11470 
11471   for (SDNode *Use : Ptr.getNode()->uses()) {
11472     if (Use == N)
11473       continue;
11474     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11475       return false;
11476 
11477     // If Ptr may be folded in addressing mode of other use, then it's
11478     // not profitable to do this transformation.
11479     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11480       RealUse = true;
11481   }
11482 
11483   if (!RealUse)
11484     return false;
11485 
11486   SDValue Result;
11487   if (isLoad)
11488     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11489                                 BasePtr, Offset, AM);
11490   else
11491     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11492                                  BasePtr, Offset, AM);
11493   ++PreIndexedNodes;
11494   ++NodesCombined;
11495   DEBUG(dbgs() << "\nReplacing.4 ";
11496         N->dump(&DAG);
11497         dbgs() << "\nWith: ";
11498         Result.getNode()->dump(&DAG);
11499         dbgs() << '\n');
11500   WorklistRemover DeadNodes(*this);
11501   if (isLoad) {
11502     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11503     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11504   } else {
11505     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11506   }
11507 
11508   // Finally, since the node is now dead, remove it from the graph.
11509   deleteAndRecombine(N);
11510 
11511   if (Swapped)
11512     std::swap(BasePtr, Offset);
11513 
11514   // Replace other uses of BasePtr that can be updated to use Ptr
11515   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
11516     unsigned OffsetIdx = 1;
11517     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
11518       OffsetIdx = 0;
11519     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
11520            BasePtr.getNode() && "Expected BasePtr operand");
11521 
11522     // We need to replace ptr0 in the following expression:
11523     //   x0 * offset0 + y0 * ptr0 = t0
11524     // knowing that
11525     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
11526     //
11527     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
11528     // indexed load/store and the expression that needs to be re-written.
11529     //
11530     // Therefore, we have:
11531     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
11532 
11533     ConstantSDNode *CN =
11534       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
11535     int X0, X1, Y0, Y1;
11536     const APInt &Offset0 = CN->getAPIntValue();
11537     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
11538 
11539     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
11540     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
11541     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
11542     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
11543 
11544     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
11545 
11546     APInt CNV = Offset0;
11547     if (X0 < 0) CNV = -CNV;
11548     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
11549     else CNV = CNV - Offset1;
11550 
11551     SDLoc DL(OtherUses[i]);
11552 
11553     // We can now generate the new expression.
11554     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
11555     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
11556 
11557     SDValue NewUse = DAG.getNode(Opcode,
11558                                  DL,
11559                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
11560     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
11561     deleteAndRecombine(OtherUses[i]);
11562   }
11563 
11564   // Replace the uses of Ptr with uses of the updated base value.
11565   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
11566   deleteAndRecombine(Ptr.getNode());
11567   AddToWorklist(Result.getNode());
11568 
11569   return true;
11570 }
11571 
11572 /// Try to combine a load/store with a add/sub of the base pointer node into a
11573 /// post-indexed load/store. The transformation folded the add/subtract into the
11574 /// new indexed load/store effectively and all of its uses are redirected to the
11575 /// new load/store.
11576 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
11577   if (Level < AfterLegalizeDAG)
11578     return false;
11579 
11580   bool isLoad = true;
11581   SDValue Ptr;
11582   EVT VT;
11583   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11584     if (LD->isIndexed())
11585       return false;
11586     VT = LD->getMemoryVT();
11587     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
11588         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
11589       return false;
11590     Ptr = LD->getBasePtr();
11591   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11592     if (ST->isIndexed())
11593       return false;
11594     VT = ST->getMemoryVT();
11595     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
11596         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
11597       return false;
11598     Ptr = ST->getBasePtr();
11599     isLoad = false;
11600   } else {
11601     return false;
11602   }
11603 
11604   if (Ptr.getNode()->hasOneUse())
11605     return false;
11606 
11607   for (SDNode *Op : Ptr.getNode()->uses()) {
11608     if (Op == N ||
11609         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
11610       continue;
11611 
11612     SDValue BasePtr;
11613     SDValue Offset;
11614     ISD::MemIndexedMode AM = ISD::UNINDEXED;
11615     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
11616       // Don't create a indexed load / store with zero offset.
11617       if (isNullConstant(Offset))
11618         continue;
11619 
11620       // Try turning it into a post-indexed load / store except when
11621       // 1) All uses are load / store ops that use it as base ptr (and
11622       //    it may be folded as addressing mmode).
11623       // 2) Op must be independent of N, i.e. Op is neither a predecessor
11624       //    nor a successor of N. Otherwise, if Op is folded that would
11625       //    create a cycle.
11626 
11627       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11628         continue;
11629 
11630       // Check for #1.
11631       bool TryNext = false;
11632       for (SDNode *Use : BasePtr.getNode()->uses()) {
11633         if (Use == Ptr.getNode())
11634           continue;
11635 
11636         // If all the uses are load / store addresses, then don't do the
11637         // transformation.
11638         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
11639           bool RealUse = false;
11640           for (SDNode *UseUse : Use->uses()) {
11641             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
11642               RealUse = true;
11643           }
11644 
11645           if (!RealUse) {
11646             TryNext = true;
11647             break;
11648           }
11649         }
11650       }
11651 
11652       if (TryNext)
11653         continue;
11654 
11655       // Check for #2
11656       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
11657         SDValue Result = isLoad
11658           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11659                                BasePtr, Offset, AM)
11660           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11661                                 BasePtr, Offset, AM);
11662         ++PostIndexedNodes;
11663         ++NodesCombined;
11664         DEBUG(dbgs() << "\nReplacing.5 ";
11665               N->dump(&DAG);
11666               dbgs() << "\nWith: ";
11667               Result.getNode()->dump(&DAG);
11668               dbgs() << '\n');
11669         WorklistRemover DeadNodes(*this);
11670         if (isLoad) {
11671           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11672           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11673         } else {
11674           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11675         }
11676 
11677         // Finally, since the node is now dead, remove it from the graph.
11678         deleteAndRecombine(N);
11679 
11680         // Replace the uses of Use with uses of the updated base value.
11681         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
11682                                       Result.getValue(isLoad ? 1 : 0));
11683         deleteAndRecombine(Op);
11684         return true;
11685       }
11686     }
11687   }
11688 
11689   return false;
11690 }
11691 
11692 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
11693 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
11694   ISD::MemIndexedMode AM = LD->getAddressingMode();
11695   assert(AM != ISD::UNINDEXED);
11696   SDValue BP = LD->getOperand(1);
11697   SDValue Inc = LD->getOperand(2);
11698 
11699   // Some backends use TargetConstants for load offsets, but don't expect
11700   // TargetConstants in general ADD nodes. We can convert these constants into
11701   // regular Constants (if the constant is not opaque).
11702   assert((Inc.getOpcode() != ISD::TargetConstant ||
11703           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
11704          "Cannot split out indexing using opaque target constants");
11705   if (Inc.getOpcode() == ISD::TargetConstant) {
11706     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
11707     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
11708                           ConstInc->getValueType(0));
11709   }
11710 
11711   unsigned Opc =
11712       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
11713   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
11714 }
11715 
11716 SDValue DAGCombiner::visitLOAD(SDNode *N) {
11717   LoadSDNode *LD  = cast<LoadSDNode>(N);
11718   SDValue Chain = LD->getChain();
11719   SDValue Ptr   = LD->getBasePtr();
11720 
11721   // If load is not volatile and there are no uses of the loaded value (and
11722   // the updated indexed value in case of indexed loads), change uses of the
11723   // chain value into uses of the chain input (i.e. delete the dead load).
11724   if (!LD->isVolatile()) {
11725     if (N->getValueType(1) == MVT::Other) {
11726       // Unindexed loads.
11727       if (!N->hasAnyUseOfValue(0)) {
11728         // It's not safe to use the two value CombineTo variant here. e.g.
11729         // v1, chain2 = load chain1, loc
11730         // v2, chain3 = load chain2, loc
11731         // v3         = add v2, c
11732         // Now we replace use of chain2 with chain1.  This makes the second load
11733         // isomorphic to the one we are deleting, and thus makes this load live.
11734         DEBUG(dbgs() << "\nReplacing.6 ";
11735               N->dump(&DAG);
11736               dbgs() << "\nWith chain: ";
11737               Chain.getNode()->dump(&DAG);
11738               dbgs() << "\n");
11739         WorklistRemover DeadNodes(*this);
11740         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11741         AddUsersToWorklist(Chain.getNode());
11742         if (N->use_empty())
11743           deleteAndRecombine(N);
11744 
11745         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11746       }
11747     } else {
11748       // Indexed loads.
11749       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
11750 
11751       // If this load has an opaque TargetConstant offset, then we cannot split
11752       // the indexing into an add/sub directly (that TargetConstant may not be
11753       // valid for a different type of node, and we cannot convert an opaque
11754       // target constant into a regular constant).
11755       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
11756                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
11757 
11758       if (!N->hasAnyUseOfValue(0) &&
11759           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
11760         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
11761         SDValue Index;
11762         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
11763           Index = SplitIndexingFromLoad(LD);
11764           // Try to fold the base pointer arithmetic into subsequent loads and
11765           // stores.
11766           AddUsersToWorklist(N);
11767         } else
11768           Index = DAG.getUNDEF(N->getValueType(1));
11769         DEBUG(dbgs() << "\nReplacing.7 ";
11770               N->dump(&DAG);
11771               dbgs() << "\nWith: ";
11772               Undef.getNode()->dump(&DAG);
11773               dbgs() << " and 2 other values\n");
11774         WorklistRemover DeadNodes(*this);
11775         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
11776         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
11777         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
11778         deleteAndRecombine(N);
11779         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11780       }
11781     }
11782   }
11783 
11784   // If this load is directly stored, replace the load value with the stored
11785   // value.
11786   // TODO: Handle store large -> read small portion.
11787   // TODO: Handle TRUNCSTORE/LOADEXT
11788   if (OptLevel != CodeGenOpt::None &&
11789       ISD::isNormalLoad(N) && !LD->isVolatile()) {
11790     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
11791       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
11792       if (PrevST->getBasePtr() == Ptr &&
11793           PrevST->getValue().getValueType() == N->getValueType(0))
11794         return CombineTo(N, PrevST->getOperand(1), Chain);
11795     }
11796   }
11797 
11798   // Try to infer better alignment information than the load already has.
11799   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
11800     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11801       if (Align > LD->getMemOperand()->getBaseAlignment()) {
11802         SDValue NewLoad = DAG.getExtLoad(
11803             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
11804             LD->getPointerInfo(), LD->getMemoryVT(), Align,
11805             LD->getMemOperand()->getFlags(), LD->getAAInfo());
11806         if (NewLoad.getNode() != N)
11807           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
11808       }
11809     }
11810   }
11811 
11812   if (LD->isUnindexed()) {
11813     // Walk up chain skipping non-aliasing memory nodes.
11814     SDValue BetterChain = FindBetterChain(N, Chain);
11815 
11816     // If there is a better chain.
11817     if (Chain != BetterChain) {
11818       SDValue ReplLoad;
11819 
11820       // Replace the chain to void dependency.
11821       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
11822         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
11823                                BetterChain, Ptr, LD->getMemOperand());
11824       } else {
11825         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
11826                                   LD->getValueType(0),
11827                                   BetterChain, Ptr, LD->getMemoryVT(),
11828                                   LD->getMemOperand());
11829       }
11830 
11831       // Create token factor to keep old chain connected.
11832       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11833                                   MVT::Other, Chain, ReplLoad.getValue(1));
11834 
11835       // Replace uses with load result and token factor
11836       return CombineTo(N, ReplLoad.getValue(0), Token);
11837     }
11838   }
11839 
11840   // Try transforming N to an indexed load.
11841   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11842     return SDValue(N, 0);
11843 
11844   // Try to slice up N to more direct loads if the slices are mapped to
11845   // different register banks or pairing can take place.
11846   if (SliceUpLoad(N))
11847     return SDValue(N, 0);
11848 
11849   return SDValue();
11850 }
11851 
11852 namespace {
11853 
11854 /// \brief Helper structure used to slice a load in smaller loads.
11855 /// Basically a slice is obtained from the following sequence:
11856 /// Origin = load Ty1, Base
11857 /// Shift = srl Ty1 Origin, CstTy Amount
11858 /// Inst = trunc Shift to Ty2
11859 ///
11860 /// Then, it will be rewritten into:
11861 /// Slice = load SliceTy, Base + SliceOffset
11862 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
11863 ///
11864 /// SliceTy is deduced from the number of bits that are actually used to
11865 /// build Inst.
11866 struct LoadedSlice {
11867   /// \brief Helper structure used to compute the cost of a slice.
11868   struct Cost {
11869     /// Are we optimizing for code size.
11870     bool ForCodeSize;
11871 
11872     /// Various cost.
11873     unsigned Loads = 0;
11874     unsigned Truncates = 0;
11875     unsigned CrossRegisterBanksCopies = 0;
11876     unsigned ZExts = 0;
11877     unsigned Shift = 0;
11878 
11879     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
11880 
11881     /// \brief Get the cost of one isolated slice.
11882     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
11883         : ForCodeSize(ForCodeSize), Loads(1) {
11884       EVT TruncType = LS.Inst->getValueType(0);
11885       EVT LoadedType = LS.getLoadedType();
11886       if (TruncType != LoadedType &&
11887           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
11888         ZExts = 1;
11889     }
11890 
11891     /// \brief Account for slicing gain in the current cost.
11892     /// Slicing provide a few gains like removing a shift or a
11893     /// truncate. This method allows to grow the cost of the original
11894     /// load with the gain from this slice.
11895     void addSliceGain(const LoadedSlice &LS) {
11896       // Each slice saves a truncate.
11897       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
11898       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
11899                               LS.Inst->getValueType(0)))
11900         ++Truncates;
11901       // If there is a shift amount, this slice gets rid of it.
11902       if (LS.Shift)
11903         ++Shift;
11904       // If this slice can merge a cross register bank copy, account for it.
11905       if (LS.canMergeExpensiveCrossRegisterBankCopy())
11906         ++CrossRegisterBanksCopies;
11907     }
11908 
11909     Cost &operator+=(const Cost &RHS) {
11910       Loads += RHS.Loads;
11911       Truncates += RHS.Truncates;
11912       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
11913       ZExts += RHS.ZExts;
11914       Shift += RHS.Shift;
11915       return *this;
11916     }
11917 
11918     bool operator==(const Cost &RHS) const {
11919       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
11920              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
11921              ZExts == RHS.ZExts && Shift == RHS.Shift;
11922     }
11923 
11924     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
11925 
11926     bool operator<(const Cost &RHS) const {
11927       // Assume cross register banks copies are as expensive as loads.
11928       // FIXME: Do we want some more target hooks?
11929       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
11930       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
11931       // Unless we are optimizing for code size, consider the
11932       // expensive operation first.
11933       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
11934         return ExpensiveOpsLHS < ExpensiveOpsRHS;
11935       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
11936              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
11937     }
11938 
11939     bool operator>(const Cost &RHS) const { return RHS < *this; }
11940 
11941     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
11942 
11943     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
11944   };
11945 
11946   // The last instruction that represent the slice. This should be a
11947   // truncate instruction.
11948   SDNode *Inst;
11949 
11950   // The original load instruction.
11951   LoadSDNode *Origin;
11952 
11953   // The right shift amount in bits from the original load.
11954   unsigned Shift;
11955 
11956   // The DAG from which Origin came from.
11957   // This is used to get some contextual information about legal types, etc.
11958   SelectionDAG *DAG;
11959 
11960   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
11961               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
11962       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
11963 
11964   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
11965   /// \return Result is \p BitWidth and has used bits set to 1 and
11966   ///         not used bits set to 0.
11967   APInt getUsedBits() const {
11968     // Reproduce the trunc(lshr) sequence:
11969     // - Start from the truncated value.
11970     // - Zero extend to the desired bit width.
11971     // - Shift left.
11972     assert(Origin && "No original load to compare against.");
11973     unsigned BitWidth = Origin->getValueSizeInBits(0);
11974     assert(Inst && "This slice is not bound to an instruction");
11975     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
11976            "Extracted slice is bigger than the whole type!");
11977     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
11978     UsedBits.setAllBits();
11979     UsedBits = UsedBits.zext(BitWidth);
11980     UsedBits <<= Shift;
11981     return UsedBits;
11982   }
11983 
11984   /// \brief Get the size of the slice to be loaded in bytes.
11985   unsigned getLoadedSize() const {
11986     unsigned SliceSize = getUsedBits().countPopulation();
11987     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
11988     return SliceSize / 8;
11989   }
11990 
11991   /// \brief Get the type that will be loaded for this slice.
11992   /// Note: This may not be the final type for the slice.
11993   EVT getLoadedType() const {
11994     assert(DAG && "Missing context");
11995     LLVMContext &Ctxt = *DAG->getContext();
11996     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
11997   }
11998 
11999   /// \brief Get the alignment of the load used for this slice.
12000   unsigned getAlignment() const {
12001     unsigned Alignment = Origin->getAlignment();
12002     unsigned Offset = getOffsetFromBase();
12003     if (Offset != 0)
12004       Alignment = MinAlign(Alignment, Alignment + Offset);
12005     return Alignment;
12006   }
12007 
12008   /// \brief Check if this slice can be rewritten with legal operations.
12009   bool isLegal() const {
12010     // An invalid slice is not legal.
12011     if (!Origin || !Inst || !DAG)
12012       return false;
12013 
12014     // Offsets are for indexed load only, we do not handle that.
12015     if (!Origin->getOffset().isUndef())
12016       return false;
12017 
12018     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12019 
12020     // Check that the type is legal.
12021     EVT SliceType = getLoadedType();
12022     if (!TLI.isTypeLegal(SliceType))
12023       return false;
12024 
12025     // Check that the load is legal for this type.
12026     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12027       return false;
12028 
12029     // Check that the offset can be computed.
12030     // 1. Check its type.
12031     EVT PtrType = Origin->getBasePtr().getValueType();
12032     if (PtrType == MVT::Untyped || PtrType.isExtended())
12033       return false;
12034 
12035     // 2. Check that it fits in the immediate.
12036     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12037       return false;
12038 
12039     // 3. Check that the computation is legal.
12040     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12041       return false;
12042 
12043     // Check that the zext is legal if it needs one.
12044     EVT TruncateType = Inst->getValueType(0);
12045     if (TruncateType != SliceType &&
12046         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12047       return false;
12048 
12049     return true;
12050   }
12051 
12052   /// \brief Get the offset in bytes of this slice in the original chunk of
12053   /// bits.
12054   /// \pre DAG != nullptr.
12055   uint64_t getOffsetFromBase() const {
12056     assert(DAG && "Missing context.");
12057     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12058     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12059     uint64_t Offset = Shift / 8;
12060     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12061     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12062            "The size of the original loaded type is not a multiple of a"
12063            " byte.");
12064     // If Offset is bigger than TySizeInBytes, it means we are loading all
12065     // zeros. This should have been optimized before in the process.
12066     assert(TySizeInBytes > Offset &&
12067            "Invalid shift amount for given loaded size");
12068     if (IsBigEndian)
12069       Offset = TySizeInBytes - Offset - getLoadedSize();
12070     return Offset;
12071   }
12072 
12073   /// \brief Generate the sequence of instructions to load the slice
12074   /// represented by this object and redirect the uses of this slice to
12075   /// this new sequence of instructions.
12076   /// \pre this->Inst && this->Origin are valid Instructions and this
12077   /// object passed the legal check: LoadedSlice::isLegal returned true.
12078   /// \return The last instruction of the sequence used to load the slice.
12079   SDValue loadSlice() const {
12080     assert(Inst && Origin && "Unable to replace a non-existing slice.");
12081     const SDValue &OldBaseAddr = Origin->getBasePtr();
12082     SDValue BaseAddr = OldBaseAddr;
12083     // Get the offset in that chunk of bytes w.r.t. the endianness.
12084     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12085     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12086     if (Offset) {
12087       // BaseAddr = BaseAddr + Offset.
12088       EVT ArithType = BaseAddr.getValueType();
12089       SDLoc DL(Origin);
12090       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12091                               DAG->getConstant(Offset, DL, ArithType));
12092     }
12093 
12094     // Create the type of the loaded slice according to its size.
12095     EVT SliceType = getLoadedType();
12096 
12097     // Create the load for the slice.
12098     SDValue LastInst =
12099         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12100                      Origin->getPointerInfo().getWithOffset(Offset),
12101                      getAlignment(), Origin->getMemOperand()->getFlags());
12102     // If the final type is not the same as the loaded type, this means that
12103     // we have to pad with zero. Create a zero extend for that.
12104     EVT FinalType = Inst->getValueType(0);
12105     if (SliceType != FinalType)
12106       LastInst =
12107           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12108     return LastInst;
12109   }
12110 
12111   /// \brief Check if this slice can be merged with an expensive cross register
12112   /// bank copy. E.g.,
12113   /// i = load i32
12114   /// f = bitcast i32 i to float
12115   bool canMergeExpensiveCrossRegisterBankCopy() const {
12116     if (!Inst || !Inst->hasOneUse())
12117       return false;
12118     SDNode *Use = *Inst->use_begin();
12119     if (Use->getOpcode() != ISD::BITCAST)
12120       return false;
12121     assert(DAG && "Missing context");
12122     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12123     EVT ResVT = Use->getValueType(0);
12124     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12125     const TargetRegisterClass *ArgRC =
12126         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12127     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12128       return false;
12129 
12130     // At this point, we know that we perform a cross-register-bank copy.
12131     // Check if it is expensive.
12132     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12133     // Assume bitcasts are cheap, unless both register classes do not
12134     // explicitly share a common sub class.
12135     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
12136       return false;
12137 
12138     // Check if it will be merged with the load.
12139     // 1. Check the alignment constraint.
12140     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
12141         ResVT.getTypeForEVT(*DAG->getContext()));
12142 
12143     if (RequiredAlignment > getAlignment())
12144       return false;
12145 
12146     // 2. Check that the load is a legal operation for that type.
12147     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
12148       return false;
12149 
12150     // 3. Check that we do not have a zext in the way.
12151     if (Inst->getValueType(0) != getLoadedType())
12152       return false;
12153 
12154     return true;
12155   }
12156 };
12157 
12158 } // end anonymous namespace
12159 
12160 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
12161 /// \p UsedBits looks like 0..0 1..1 0..0.
12162 static bool areUsedBitsDense(const APInt &UsedBits) {
12163   // If all the bits are one, this is dense!
12164   if (UsedBits.isAllOnesValue())
12165     return true;
12166 
12167   // Get rid of the unused bits on the right.
12168   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
12169   // Get rid of the unused bits on the left.
12170   if (NarrowedUsedBits.countLeadingZeros())
12171     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
12172   // Check that the chunk of bits is completely used.
12173   return NarrowedUsedBits.isAllOnesValue();
12174 }
12175 
12176 /// \brief Check whether or not \p First and \p Second are next to each other
12177 /// in memory. This means that there is no hole between the bits loaded
12178 /// by \p First and the bits loaded by \p Second.
12179 static bool areSlicesNextToEachOther(const LoadedSlice &First,
12180                                      const LoadedSlice &Second) {
12181   assert(First.Origin == Second.Origin && First.Origin &&
12182          "Unable to match different memory origins.");
12183   APInt UsedBits = First.getUsedBits();
12184   assert((UsedBits & Second.getUsedBits()) == 0 &&
12185          "Slices are not supposed to overlap.");
12186   UsedBits |= Second.getUsedBits();
12187   return areUsedBitsDense(UsedBits);
12188 }
12189 
12190 /// \brief Adjust the \p GlobalLSCost according to the target
12191 /// paring capabilities and the layout of the slices.
12192 /// \pre \p GlobalLSCost should account for at least as many loads as
12193 /// there is in the slices in \p LoadedSlices.
12194 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12195                                  LoadedSlice::Cost &GlobalLSCost) {
12196   unsigned NumberOfSlices = LoadedSlices.size();
12197   // If there is less than 2 elements, no pairing is possible.
12198   if (NumberOfSlices < 2)
12199     return;
12200 
12201   // Sort the slices so that elements that are likely to be next to each
12202   // other in memory are next to each other in the list.
12203   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
12204             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
12205     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
12206     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
12207   });
12208   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
12209   // First (resp. Second) is the first (resp. Second) potentially candidate
12210   // to be placed in a paired load.
12211   const LoadedSlice *First = nullptr;
12212   const LoadedSlice *Second = nullptr;
12213   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
12214                 // Set the beginning of the pair.
12215                                                            First = Second) {
12216     Second = &LoadedSlices[CurrSlice];
12217 
12218     // If First is NULL, it means we start a new pair.
12219     // Get to the next slice.
12220     if (!First)
12221       continue;
12222 
12223     EVT LoadedType = First->getLoadedType();
12224 
12225     // If the types of the slices are different, we cannot pair them.
12226     if (LoadedType != Second->getLoadedType())
12227       continue;
12228 
12229     // Check if the target supplies paired loads for this type.
12230     unsigned RequiredAlignment = 0;
12231     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
12232       // move to the next pair, this type is hopeless.
12233       Second = nullptr;
12234       continue;
12235     }
12236     // Check if we meet the alignment requirement.
12237     if (RequiredAlignment > First->getAlignment())
12238       continue;
12239 
12240     // Check that both loads are next to each other in memory.
12241     if (!areSlicesNextToEachOther(*First, *Second))
12242       continue;
12243 
12244     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
12245     --GlobalLSCost.Loads;
12246     // Move to the next pair.
12247     Second = nullptr;
12248   }
12249 }
12250 
12251 /// \brief Check the profitability of all involved LoadedSlice.
12252 /// Currently, it is considered profitable if there is exactly two
12253 /// involved slices (1) which are (2) next to each other in memory, and
12254 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
12255 ///
12256 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
12257 /// the elements themselves.
12258 ///
12259 /// FIXME: When the cost model will be mature enough, we can relax
12260 /// constraints (1) and (2).
12261 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12262                                 const APInt &UsedBits, bool ForCodeSize) {
12263   unsigned NumberOfSlices = LoadedSlices.size();
12264   if (StressLoadSlicing)
12265     return NumberOfSlices > 1;
12266 
12267   // Check (1).
12268   if (NumberOfSlices != 2)
12269     return false;
12270 
12271   // Check (2).
12272   if (!areUsedBitsDense(UsedBits))
12273     return false;
12274 
12275   // Check (3).
12276   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
12277   // The original code has one big load.
12278   OrigCost.Loads = 1;
12279   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
12280     const LoadedSlice &LS = LoadedSlices[CurrSlice];
12281     // Accumulate the cost of all the slices.
12282     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
12283     GlobalSlicingCost += SliceCost;
12284 
12285     // Account as cost in the original configuration the gain obtained
12286     // with the current slices.
12287     OrigCost.addSliceGain(LS);
12288   }
12289 
12290   // If the target supports paired load, adjust the cost accordingly.
12291   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12292   return OrigCost > GlobalSlicingCost;
12293 }
12294 
12295 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
12296 /// operations, split it in the various pieces being extracted.
12297 ///
12298 /// This sort of thing is introduced by SROA.
12299 /// This slicing takes care not to insert overlapping loads.
12300 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
12301 bool DAGCombiner::SliceUpLoad(SDNode *N) {
12302   if (Level < AfterLegalizeDAG)
12303     return false;
12304 
12305   LoadSDNode *LD = cast<LoadSDNode>(N);
12306   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12307       !LD->getValueType(0).isInteger())
12308     return false;
12309 
12310   // Keep track of already used bits to detect overlapping values.
12311   // In that case, we will just abort the transformation.
12312   APInt UsedBits(LD->getValueSizeInBits(0), 0);
12313 
12314   SmallVector<LoadedSlice, 4> LoadedSlices;
12315 
12316   // Check if this load is used as several smaller chunks of bits.
12317   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12318   // of computation for each trunc.
12319   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12320        UI != UIEnd; ++UI) {
12321     // Skip the uses of the chain.
12322     if (UI.getUse().getResNo() != 0)
12323       continue;
12324 
12325     SDNode *User = *UI;
12326     unsigned Shift = 0;
12327 
12328     // Check if this is a trunc(lshr).
12329     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12330         isa<ConstantSDNode>(User->getOperand(1))) {
12331       Shift = User->getConstantOperandVal(1);
12332       User = *User->use_begin();
12333     }
12334 
12335     // At this point, User is a Truncate, iff we encountered, trunc or
12336     // trunc(lshr).
12337     if (User->getOpcode() != ISD::TRUNCATE)
12338       return false;
12339 
12340     // The width of the type must be a power of 2 and greater than 8-bits.
12341     // Otherwise the load cannot be represented in LLVM IR.
12342     // Moreover, if we shifted with a non-8-bits multiple, the slice
12343     // will be across several bytes. We do not support that.
12344     unsigned Width = User->getValueSizeInBits(0);
12345     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12346       return false;
12347 
12348     // Build the slice for this chain of computations.
12349     LoadedSlice LS(User, LD, Shift, &DAG);
12350     APInt CurrentUsedBits = LS.getUsedBits();
12351 
12352     // Check if this slice overlaps with another.
12353     if ((CurrentUsedBits & UsedBits) != 0)
12354       return false;
12355     // Update the bits used globally.
12356     UsedBits |= CurrentUsedBits;
12357 
12358     // Check if the new slice would be legal.
12359     if (!LS.isLegal())
12360       return false;
12361 
12362     // Record the slice.
12363     LoadedSlices.push_back(LS);
12364   }
12365 
12366   // Abort slicing if it does not seem to be profitable.
12367   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12368     return false;
12369 
12370   ++SlicedLoads;
12371 
12372   // Rewrite each chain to use an independent load.
12373   // By construction, each chain can be represented by a unique load.
12374 
12375   // Prepare the argument for the new token factor for all the slices.
12376   SmallVector<SDValue, 8> ArgChains;
12377   for (SmallVectorImpl<LoadedSlice>::const_iterator
12378            LSIt = LoadedSlices.begin(),
12379            LSItEnd = LoadedSlices.end();
12380        LSIt != LSItEnd; ++LSIt) {
12381     SDValue SliceInst = LSIt->loadSlice();
12382     CombineTo(LSIt->Inst, SliceInst, true);
12383     if (SliceInst.getOpcode() != ISD::LOAD)
12384       SliceInst = SliceInst.getOperand(0);
12385     assert(SliceInst->getOpcode() == ISD::LOAD &&
12386            "It takes more than a zext to get to the loaded slice!!");
12387     ArgChains.push_back(SliceInst.getValue(1));
12388   }
12389 
12390   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12391                               ArgChains);
12392   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12393   AddToWorklist(Chain.getNode());
12394   return true;
12395 }
12396 
12397 /// Check to see if V is (and load (ptr), imm), where the load is having
12398 /// specific bytes cleared out.  If so, return the byte size being masked out
12399 /// and the shift amount.
12400 static std::pair<unsigned, unsigned>
12401 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12402   std::pair<unsigned, unsigned> Result(0, 0);
12403 
12404   // Check for the structure we're looking for.
12405   if (V->getOpcode() != ISD::AND ||
12406       !isa<ConstantSDNode>(V->getOperand(1)) ||
12407       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12408     return Result;
12409 
12410   // Check the chain and pointer.
12411   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12412   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12413 
12414   // The store should be chained directly to the load or be an operand of a
12415   // tokenfactor.
12416   if (LD == Chain.getNode())
12417     ; // ok.
12418   else if (Chain->getOpcode() != ISD::TokenFactor)
12419     return Result; // Fail.
12420   else {
12421     bool isOk = false;
12422     for (const SDValue &ChainOp : Chain->op_values())
12423       if (ChainOp.getNode() == LD) {
12424         isOk = true;
12425         break;
12426       }
12427     if (!isOk) return Result;
12428   }
12429 
12430   // This only handles simple types.
12431   if (V.getValueType() != MVT::i16 &&
12432       V.getValueType() != MVT::i32 &&
12433       V.getValueType() != MVT::i64)
12434     return Result;
12435 
12436   // Check the constant mask.  Invert it so that the bits being masked out are
12437   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12438   // follow the sign bit for uniformity.
12439   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12440   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12441   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12442   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12443   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12444   if (NotMaskLZ == 64) return Result;  // All zero mask.
12445 
12446   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12447   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12448     return Result;
12449 
12450   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12451   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12452     NotMaskLZ -= 64-V.getValueSizeInBits();
12453 
12454   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12455   switch (MaskedBytes) {
12456   case 1:
12457   case 2:
12458   case 4: break;
12459   default: return Result; // All one mask, or 5-byte mask.
12460   }
12461 
12462   // Verify that the first bit starts at a multiple of mask so that the access
12463   // is aligned the same as the access width.
12464   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12465 
12466   Result.first = MaskedBytes;
12467   Result.second = NotMaskTZ/8;
12468   return Result;
12469 }
12470 
12471 /// Check to see if IVal is something that provides a value as specified by
12472 /// MaskInfo. If so, replace the specified store with a narrower store of
12473 /// truncated IVal.
12474 static SDNode *
12475 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12476                                 SDValue IVal, StoreSDNode *St,
12477                                 DAGCombiner *DC) {
12478   unsigned NumBytes = MaskInfo.first;
12479   unsigned ByteShift = MaskInfo.second;
12480   SelectionDAG &DAG = DC->getDAG();
12481 
12482   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12483   // that uses this.  If not, this is not a replacement.
12484   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12485                                   ByteShift*8, (ByteShift+NumBytes)*8);
12486   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12487 
12488   // Check that it is legal on the target to do this.  It is legal if the new
12489   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12490   // legalization.
12491   MVT VT = MVT::getIntegerVT(NumBytes*8);
12492   if (!DC->isTypeLegal(VT))
12493     return nullptr;
12494 
12495   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12496   // shifted by ByteShift and truncated down to NumBytes.
12497   if (ByteShift) {
12498     SDLoc DL(IVal);
12499     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12500                        DAG.getConstant(ByteShift*8, DL,
12501                                     DC->getShiftAmountTy(IVal.getValueType())));
12502   }
12503 
12504   // Figure out the offset for the store and the alignment of the access.
12505   unsigned StOffset;
12506   unsigned NewAlign = St->getAlignment();
12507 
12508   if (DAG.getDataLayout().isLittleEndian())
12509     StOffset = ByteShift;
12510   else
12511     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12512 
12513   SDValue Ptr = St->getBasePtr();
12514   if (StOffset) {
12515     SDLoc DL(IVal);
12516     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12517                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
12518     NewAlign = MinAlign(NewAlign, StOffset);
12519   }
12520 
12521   // Truncate down to the new size.
12522   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
12523 
12524   ++OpsNarrowed;
12525   return DAG
12526       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
12527                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
12528       .getNode();
12529 }
12530 
12531 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
12532 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
12533 /// narrowing the load and store if it would end up being a win for performance
12534 /// or code size.
12535 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
12536   StoreSDNode *ST  = cast<StoreSDNode>(N);
12537   if (ST->isVolatile())
12538     return SDValue();
12539 
12540   SDValue Chain = ST->getChain();
12541   SDValue Value = ST->getValue();
12542   SDValue Ptr   = ST->getBasePtr();
12543   EVT VT = Value.getValueType();
12544 
12545   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
12546     return SDValue();
12547 
12548   unsigned Opc = Value.getOpcode();
12549 
12550   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
12551   // is a byte mask indicating a consecutive number of bytes, check to see if
12552   // Y is known to provide just those bytes.  If so, we try to replace the
12553   // load + replace + store sequence with a single (narrower) store, which makes
12554   // the load dead.
12555   if (Opc == ISD::OR) {
12556     std::pair<unsigned, unsigned> MaskedLoad;
12557     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
12558     if (MaskedLoad.first)
12559       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12560                                                   Value.getOperand(1), ST,this))
12561         return SDValue(NewST, 0);
12562 
12563     // Or is commutative, so try swapping X and Y.
12564     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
12565     if (MaskedLoad.first)
12566       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
12567                                                   Value.getOperand(0), ST,this))
12568         return SDValue(NewST, 0);
12569   }
12570 
12571   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
12572       Value.getOperand(1).getOpcode() != ISD::Constant)
12573     return SDValue();
12574 
12575   SDValue N0 = Value.getOperand(0);
12576   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12577       Chain == SDValue(N0.getNode(), 1)) {
12578     LoadSDNode *LD = cast<LoadSDNode>(N0);
12579     if (LD->getBasePtr() != Ptr ||
12580         LD->getPointerInfo().getAddrSpace() !=
12581         ST->getPointerInfo().getAddrSpace())
12582       return SDValue();
12583 
12584     // Find the type to narrow it the load / op / store to.
12585     SDValue N1 = Value.getOperand(1);
12586     unsigned BitWidth = N1.getValueSizeInBits();
12587     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
12588     if (Opc == ISD::AND)
12589       Imm ^= APInt::getAllOnesValue(BitWidth);
12590     if (Imm == 0 || Imm.isAllOnesValue())
12591       return SDValue();
12592     unsigned ShAmt = Imm.countTrailingZeros();
12593     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
12594     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
12595     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12596     // The narrowing should be profitable, the load/store operation should be
12597     // legal (or custom) and the store size should be equal to the NewVT width.
12598     while (NewBW < BitWidth &&
12599            (NewVT.getStoreSizeInBits() != NewBW ||
12600             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
12601             !TLI.isNarrowingProfitable(VT, NewVT))) {
12602       NewBW = NextPowerOf2(NewBW);
12603       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
12604     }
12605     if (NewBW >= BitWidth)
12606       return SDValue();
12607 
12608     // If the lsb changed does not start at the type bitwidth boundary,
12609     // start at the previous one.
12610     if (ShAmt % NewBW)
12611       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
12612     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
12613                                    std::min(BitWidth, ShAmt + NewBW));
12614     if ((Imm & Mask) == Imm) {
12615       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
12616       if (Opc == ISD::AND)
12617         NewImm ^= APInt::getAllOnesValue(NewBW);
12618       uint64_t PtrOff = ShAmt / 8;
12619       // For big endian targets, we need to adjust the offset to the pointer to
12620       // load the correct bytes.
12621       if (DAG.getDataLayout().isBigEndian())
12622         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
12623 
12624       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
12625       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
12626       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
12627         return SDValue();
12628 
12629       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
12630                                    Ptr.getValueType(), Ptr,
12631                                    DAG.getConstant(PtrOff, SDLoc(LD),
12632                                                    Ptr.getValueType()));
12633       SDValue NewLD =
12634           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
12635                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12636                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
12637       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
12638                                    DAG.getConstant(NewImm, SDLoc(Value),
12639                                                    NewVT));
12640       SDValue NewST =
12641           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
12642                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
12643 
12644       AddToWorklist(NewPtr.getNode());
12645       AddToWorklist(NewLD.getNode());
12646       AddToWorklist(NewVal.getNode());
12647       WorklistRemover DeadNodes(*this);
12648       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
12649       ++OpsNarrowed;
12650       return NewST;
12651     }
12652   }
12653 
12654   return SDValue();
12655 }
12656 
12657 /// For a given floating point load / store pair, if the load value isn't used
12658 /// by any other operations, then consider transforming the pair to integer
12659 /// load / store operations if the target deems the transformation profitable.
12660 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
12661   StoreSDNode *ST  = cast<StoreSDNode>(N);
12662   SDValue Chain = ST->getChain();
12663   SDValue Value = ST->getValue();
12664   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
12665       Value.hasOneUse() &&
12666       Chain == SDValue(Value.getNode(), 1)) {
12667     LoadSDNode *LD = cast<LoadSDNode>(Value);
12668     EVT VT = LD->getMemoryVT();
12669     if (!VT.isFloatingPoint() ||
12670         VT != ST->getMemoryVT() ||
12671         LD->isNonTemporal() ||
12672         ST->isNonTemporal() ||
12673         LD->getPointerInfo().getAddrSpace() != 0 ||
12674         ST->getPointerInfo().getAddrSpace() != 0)
12675       return SDValue();
12676 
12677     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
12678     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
12679         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
12680         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
12681         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
12682       return SDValue();
12683 
12684     unsigned LDAlign = LD->getAlignment();
12685     unsigned STAlign = ST->getAlignment();
12686     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
12687     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
12688     if (LDAlign < ABIAlign || STAlign < ABIAlign)
12689       return SDValue();
12690 
12691     SDValue NewLD =
12692         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
12693                     LD->getPointerInfo(), LDAlign);
12694 
12695     SDValue NewST =
12696         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
12697                      ST->getPointerInfo(), STAlign);
12698 
12699     AddToWorklist(NewLD.getNode());
12700     AddToWorklist(NewST.getNode());
12701     WorklistRemover DeadNodes(*this);
12702     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
12703     ++LdStFP2Int;
12704     return NewST;
12705   }
12706 
12707   return SDValue();
12708 }
12709 
12710 // This is a helper function for visitMUL to check the profitability
12711 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
12712 // MulNode is the original multiply, AddNode is (add x, c1),
12713 // and ConstNode is c2.
12714 //
12715 // If the (add x, c1) has multiple uses, we could increase
12716 // the number of adds if we make this transformation.
12717 // It would only be worth doing this if we can remove a
12718 // multiply in the process. Check for that here.
12719 // To illustrate:
12720 //     (A + c1) * c3
12721 //     (A + c2) * c3
12722 // We're checking for cases where we have common "c3 * A" expressions.
12723 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
12724                                               SDValue &AddNode,
12725                                               SDValue &ConstNode) {
12726   APInt Val;
12727 
12728   // If the add only has one use, this would be OK to do.
12729   if (AddNode.getNode()->hasOneUse())
12730     return true;
12731 
12732   // Walk all the users of the constant with which we're multiplying.
12733   for (SDNode *Use : ConstNode->uses()) {
12734     if (Use == MulNode) // This use is the one we're on right now. Skip it.
12735       continue;
12736 
12737     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
12738       SDNode *OtherOp;
12739       SDNode *MulVar = AddNode.getOperand(0).getNode();
12740 
12741       // OtherOp is what we're multiplying against the constant.
12742       if (Use->getOperand(0) == ConstNode)
12743         OtherOp = Use->getOperand(1).getNode();
12744       else
12745         OtherOp = Use->getOperand(0).getNode();
12746 
12747       // Check to see if multiply is with the same operand of our "add".
12748       //
12749       //     ConstNode  = CONST
12750       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
12751       //     ...
12752       //     AddNode  = (A + c1)  <-- MulVar is A.
12753       //         = AddNode * ConstNode   <-- current visiting instruction.
12754       //
12755       // If we make this transformation, we will have a common
12756       // multiply (ConstNode * A) that we can save.
12757       if (OtherOp == MulVar)
12758         return true;
12759 
12760       // Now check to see if a future expansion will give us a common
12761       // multiply.
12762       //
12763       //     ConstNode  = CONST
12764       //     AddNode    = (A + c1)
12765       //     ...   = AddNode * ConstNode <-- current visiting instruction.
12766       //     ...
12767       //     OtherOp = (A + c2)
12768       //     Use     = OtherOp * ConstNode <-- visiting Use.
12769       //
12770       // If we make this transformation, we will have a common
12771       // multiply (CONST * A) after we also do the same transformation
12772       // to the "t2" instruction.
12773       if (OtherOp->getOpcode() == ISD::ADD &&
12774           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
12775           OtherOp->getOperand(0).getNode() == MulVar)
12776         return true;
12777     }
12778   }
12779 
12780   // Didn't find a case where this would be profitable.
12781   return false;
12782 }
12783 
12784 static SDValue peekThroughBitcast(SDValue V) {
12785   while (V.getOpcode() == ISD::BITCAST)
12786     V = V.getOperand(0);
12787   return V;
12788 }
12789 
12790 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
12791                                          unsigned NumStores) {
12792   SmallVector<SDValue, 8> Chains;
12793   SmallPtrSet<const SDNode *, 8> Visited;
12794   SDLoc StoreDL(StoreNodes[0].MemNode);
12795 
12796   for (unsigned i = 0; i < NumStores; ++i) {
12797     Visited.insert(StoreNodes[i].MemNode);
12798   }
12799 
12800   // don't include nodes that are children
12801   for (unsigned i = 0; i < NumStores; ++i) {
12802     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
12803       Chains.push_back(StoreNodes[i].MemNode->getChain());
12804   }
12805 
12806   assert(Chains.size() > 0 && "Chain should have generated a chain");
12807   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
12808 }
12809 
12810 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
12811     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
12812     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
12813   // Make sure we have something to merge.
12814   if (NumStores < 2)
12815     return false;
12816 
12817   // The latest Node in the DAG.
12818   SDLoc DL(StoreNodes[0].MemNode);
12819 
12820   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
12821   unsigned SizeInBits = NumStores * ElementSizeBits;
12822   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
12823 
12824   EVT StoreTy;
12825   if (UseVector) {
12826     unsigned Elts = NumStores * NumMemElts;
12827     // Get the type for the merged vector store.
12828     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
12829   } else
12830     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
12831 
12832   SDValue StoredVal;
12833   if (UseVector) {
12834     if (IsConstantSrc) {
12835       SmallVector<SDValue, 8> BuildVector;
12836       for (unsigned I = 0; I != NumStores; ++I) {
12837         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
12838         SDValue Val = St->getValue();
12839         // If constant is of the wrong type, convert it now.
12840         if (MemVT != Val.getValueType()) {
12841           Val = peekThroughBitcast(Val);
12842           // Deal with constants of wrong size.
12843           if (ElementSizeBits != Val.getValueSizeInBits()) {
12844             EVT IntMemVT =
12845                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
12846             if (isa<ConstantFPSDNode>(Val)) {
12847               // Not clear how to truncate FP values.
12848               return false;
12849             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
12850               Val = DAG.getConstant(C->getAPIntValue()
12851                                         .zextOrTrunc(Val.getValueSizeInBits())
12852                                         .zextOrTrunc(ElementSizeBits),
12853                                     SDLoc(C), IntMemVT);
12854           }
12855           // Make sure correctly size type is the correct type.
12856           Val = DAG.getBitcast(MemVT, Val);
12857         }
12858         BuildVector.push_back(Val);
12859       }
12860       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12861                                                : ISD::BUILD_VECTOR,
12862                               DL, StoreTy, BuildVector);
12863     } else {
12864       SmallVector<SDValue, 8> Ops;
12865       for (unsigned i = 0; i < NumStores; ++i) {
12866         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12867         SDValue Val = peekThroughBitcast(St->getValue());
12868         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
12869         // type MemVT. If the underlying value is not the correct
12870         // type, but it is an extraction of an appropriate vector we
12871         // can recast Val to be of the correct type. This may require
12872         // converting between EXTRACT_VECTOR_ELT and
12873         // EXTRACT_SUBVECTOR.
12874         if ((MemVT != Val.getValueType()) &&
12875             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12876              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
12877           SDValue Vec = Val.getOperand(0);
12878           EVT MemVTScalarTy = MemVT.getScalarType();
12879           // We may need to add a bitcast here to get types to line up.
12880           if (MemVTScalarTy != Vec.getValueType()) {
12881             unsigned Elts = Vec.getValueType().getSizeInBits() /
12882                             MemVTScalarTy.getSizeInBits();
12883             EVT NewVecTy =
12884                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
12885             Vec = DAG.getBitcast(NewVecTy, Vec);
12886           }
12887           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
12888                                         : ISD::EXTRACT_VECTOR_ELT;
12889           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
12890         }
12891         Ops.push_back(Val);
12892       }
12893 
12894       // Build the extracted vector elements back into a vector.
12895       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
12896                                                : ISD::BUILD_VECTOR,
12897                               DL, StoreTy, Ops);
12898     }
12899   } else {
12900     // We should always use a vector store when merging extracted vector
12901     // elements, so this path implies a store of constants.
12902     assert(IsConstantSrc && "Merged vector elements should use vector store");
12903 
12904     APInt StoreInt(SizeInBits, 0);
12905 
12906     // Construct a single integer constant which is made of the smaller
12907     // constant inputs.
12908     bool IsLE = DAG.getDataLayout().isLittleEndian();
12909     for (unsigned i = 0; i < NumStores; ++i) {
12910       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
12911       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
12912 
12913       SDValue Val = St->getValue();
12914       Val = peekThroughBitcast(Val);
12915       StoreInt <<= ElementSizeBits;
12916       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
12917         StoreInt |= C->getAPIntValue()
12918                         .zextOrTrunc(ElementSizeBits)
12919                         .zextOrTrunc(SizeInBits);
12920       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
12921         StoreInt |= C->getValueAPF()
12922                         .bitcastToAPInt()
12923                         .zextOrTrunc(ElementSizeBits)
12924                         .zextOrTrunc(SizeInBits);
12925         // If fp truncation is necessary give up for now.
12926         if (MemVT.getSizeInBits() != ElementSizeBits)
12927           return false;
12928       } else {
12929         llvm_unreachable("Invalid constant element type");
12930       }
12931     }
12932 
12933     // Create the new Load and Store operations.
12934     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
12935   }
12936 
12937   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
12938   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
12939 
12940   // make sure we use trunc store if it's necessary to be legal.
12941   SDValue NewStore;
12942   if (!UseTrunc) {
12943     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
12944                             FirstInChain->getPointerInfo(),
12945                             FirstInChain->getAlignment());
12946   } else { // Must be realized as a trunc store
12947     EVT LegalizedStoredValueTy =
12948         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
12949     unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
12950     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
12951     SDValue ExtendedStoreVal =
12952         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
12953                         LegalizedStoredValueTy);
12954     NewStore = DAG.getTruncStore(
12955         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
12956         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
12957         FirstInChain->getAlignment(),
12958         FirstInChain->getMemOperand()->getFlags());
12959   }
12960 
12961   // Replace all merged stores with the new store.
12962   for (unsigned i = 0; i < NumStores; ++i)
12963     CombineTo(StoreNodes[i].MemNode, NewStore);
12964 
12965   AddToWorklist(NewChain.getNode());
12966   return true;
12967 }
12968 
12969 void DAGCombiner::getStoreMergeCandidates(
12970     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
12971   // This holds the base pointer, index, and the offset in bytes from the base
12972   // pointer.
12973   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
12974   EVT MemVT = St->getMemoryVT();
12975 
12976   SDValue Val = peekThroughBitcast(St->getValue());
12977   // We must have a base and an offset.
12978   if (!BasePtr.getBase().getNode())
12979     return;
12980 
12981   // Do not handle stores to undef base pointers.
12982   if (BasePtr.getBase().isUndef())
12983     return;
12984 
12985   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
12986   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
12987                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
12988   bool IsLoadSrc = isa<LoadSDNode>(Val);
12989   BaseIndexOffset LBasePtr;
12990   // Match on loadbaseptr if relevant.
12991   EVT LoadVT;
12992   if (IsLoadSrc) {
12993     auto *Ld = cast<LoadSDNode>(Val);
12994     LBasePtr = BaseIndexOffset::match(Ld, DAG);
12995     LoadVT = Ld->getMemoryVT();
12996     // Load and store should be the same type.
12997     if (MemVT != LoadVT)
12998       return;
12999   }
13000   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13001                             int64_t &Offset) -> bool {
13002     if (Other->isVolatile() || Other->isIndexed())
13003       return false;
13004     SDValue Val = peekThroughBitcast(Other->getValue());
13005     // Allow merging constants of different types as integers.
13006     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13007                                            : Other->getMemoryVT() != MemVT;
13008     if (IsLoadSrc) {
13009       if (NoTypeMatch)
13010         return false;
13011       // The Load's Base Ptr must also match
13012       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13013         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13014         if (LoadVT != OtherLd->getMemoryVT())
13015           return false;
13016         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13017           return false;
13018       } else
13019         return false;
13020     }
13021     if (IsConstantSrc) {
13022       if (NoTypeMatch)
13023         return false;
13024       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13025         return false;
13026     }
13027     if (IsExtractVecSrc) {
13028       // Do not merge truncated stores here.
13029       if (Other->isTruncatingStore())
13030         return false;
13031       if (!MemVT.bitsEq(Val.getValueType()))
13032         return false;
13033       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13034           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13035         return false;
13036     }
13037     Ptr = BaseIndexOffset::match(Other, DAG);
13038     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13039   };
13040 
13041   // We looking for a root node which is an ancestor to all mergable
13042   // stores. We search up through a load, to our root and then down
13043   // through all children. For instance we will find Store{1,2,3} if
13044   // St is Store1, Store2. or Store3 where the root is not a load
13045   // which always true for nonvolatile ops. TODO: Expand
13046   // the search to find all valid candidates through multiple layers of loads.
13047   //
13048   // Root
13049   // |-------|-------|
13050   // Load    Load    Store3
13051   // |       |
13052   // Store1   Store2
13053   //
13054   // FIXME: We should be able to climb and
13055   // descend TokenFactors to find candidates as well.
13056 
13057   SDNode *RootNode = (St->getChain()).getNode();
13058 
13059   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13060     RootNode = Ldn->getChain().getNode();
13061     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13062       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13063         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13064           if (I2.getOperandNo() == 0)
13065             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13066               BaseIndexOffset Ptr;
13067               int64_t PtrDiff;
13068               if (CandidateMatch(OtherST, Ptr, PtrDiff))
13069                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13070             }
13071   } else
13072     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13073       if (I.getOperandNo() == 0)
13074         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13075           BaseIndexOffset Ptr;
13076           int64_t PtrDiff;
13077           if (CandidateMatch(OtherST, Ptr, PtrDiff))
13078             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13079         }
13080 }
13081 
13082 // We need to check that merging these stores does not cause a loop in
13083 // the DAG. Any store candidate may depend on another candidate
13084 // indirectly through its operand (we already consider dependencies
13085 // through the chain). Check in parallel by searching up from
13086 // non-chain operands of candidates.
13087 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13088     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
13089   // FIXME: We should be able to truncate a full search of
13090   // predecessors by doing a BFS and keeping tabs the originating
13091   // stores from which worklist nodes come from in a similar way to
13092   // TokenFactor simplfication.
13093 
13094   SmallPtrSet<const SDNode *, 16> Visited;
13095   SmallVector<const SDNode *, 8> Worklist;
13096   unsigned int Max = 8192;
13097   // Search Ops of store candidates.
13098   for (unsigned i = 0; i < NumStores; ++i) {
13099     SDNode *n = StoreNodes[i].MemNode;
13100     // Potential loops may happen only through non-chain operands
13101     for (unsigned j = 1; j < n->getNumOperands(); ++j)
13102       Worklist.push_back(n->getOperand(j).getNode());
13103   }
13104   // Search through DAG. We can stop early if we find a store node.
13105   for (unsigned i = 0; i < NumStores; ++i) {
13106     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
13107                                      Max))
13108       return false;
13109     // Check if we ended early, failing conservatively if so.
13110     if (Visited.size() >= Max)
13111       return false;
13112   }
13113   return true;
13114 }
13115 
13116 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13117   if (OptLevel == CodeGenOpt::None)
13118     return false;
13119 
13120   EVT MemVT = St->getMemoryVT();
13121   int64_t ElementSizeBytes = MemVT.getStoreSize();
13122   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13123 
13124   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
13125     return false;
13126 
13127   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
13128       Attribute::NoImplicitFloat);
13129 
13130   // This function cannot currently deal with non-byte-sized memory sizes.
13131   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
13132     return false;
13133 
13134   if (!MemVT.isSimple())
13135     return false;
13136 
13137   // Perform an early exit check. Do not bother looking at stored values that
13138   // are not constants, loads, or extracted vector elements.
13139   SDValue StoredVal = peekThroughBitcast(St->getValue());
13140   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
13141   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
13142                        isa<ConstantFPSDNode>(StoredVal);
13143   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13144                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13145 
13146   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
13147     return false;
13148 
13149   SmallVector<MemOpLink, 8> StoreNodes;
13150   // Find potential store merge candidates by searching through chain sub-DAG
13151   getStoreMergeCandidates(St, StoreNodes);
13152 
13153   // Check if there is anything to merge.
13154   if (StoreNodes.size() < 2)
13155     return false;
13156 
13157   // Sort the memory operands according to their distance from the
13158   // base pointer.
13159   std::sort(StoreNodes.begin(), StoreNodes.end(),
13160             [](MemOpLink LHS, MemOpLink RHS) {
13161               return LHS.OffsetFromBase < RHS.OffsetFromBase;
13162             });
13163 
13164   // Store Merge attempts to merge the lowest stores. This generally
13165   // works out as if successful, as the remaining stores are checked
13166   // after the first collection of stores is merged. However, in the
13167   // case that a non-mergeable store is found first, e.g., {p[-2],
13168   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
13169   // mergeable cases. To prevent this, we prune such stores from the
13170   // front of StoreNodes here.
13171 
13172   bool RV = false;
13173   while (StoreNodes.size() > 1) {
13174     unsigned StartIdx = 0;
13175     while ((StartIdx + 1 < StoreNodes.size()) &&
13176            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
13177                StoreNodes[StartIdx + 1].OffsetFromBase)
13178       ++StartIdx;
13179 
13180     // Bail if we don't have enough candidates to merge.
13181     if (StartIdx + 1 >= StoreNodes.size())
13182       return RV;
13183 
13184     if (StartIdx)
13185       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
13186 
13187     // Scan the memory operations on the chain and find the first
13188     // non-consecutive store memory address.
13189     unsigned NumConsecutiveStores = 1;
13190     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
13191     // Check that the addresses are consecutive starting from the second
13192     // element in the list of stores.
13193     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
13194       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
13195       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13196         break;
13197       NumConsecutiveStores = i + 1;
13198     }
13199 
13200     if (NumConsecutiveStores < 2) {
13201       StoreNodes.erase(StoreNodes.begin(),
13202                        StoreNodes.begin() + NumConsecutiveStores);
13203       continue;
13204     }
13205 
13206     // Check that we can merge these candidates without causing a cycle
13207     if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
13208                                                   NumConsecutiveStores)) {
13209       StoreNodes.erase(StoreNodes.begin(),
13210                        StoreNodes.begin() + NumConsecutiveStores);
13211       continue;
13212     }
13213 
13214     // The node with the lowest store address.
13215     LLVMContext &Context = *DAG.getContext();
13216     const DataLayout &DL = DAG.getDataLayout();
13217 
13218     // Store the constants into memory as one consecutive store.
13219     if (IsConstantSrc) {
13220       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13221       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13222       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13223       unsigned LastLegalType = 1;
13224       unsigned LastLegalVectorType = 1;
13225       bool LastIntegerTrunc = false;
13226       bool NonZero = false;
13227       unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
13228       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13229         StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
13230         SDValue StoredVal = ST->getValue();
13231         bool IsElementZero = false;
13232         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
13233           IsElementZero = C->isNullValue();
13234         else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
13235           IsElementZero = C->getConstantFPValue()->isNullValue();
13236         if (IsElementZero) {
13237           if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
13238             FirstZeroAfterNonZero = i;
13239         }
13240         NonZero |= !IsElementZero;
13241 
13242         // Find a legal type for the constant store.
13243         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13244         EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13245         bool IsFast = false;
13246         if (TLI.isTypeLegal(StoreTy) &&
13247             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13248             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13249                                    FirstStoreAlign, &IsFast) &&
13250             IsFast) {
13251           LastIntegerTrunc = false;
13252           LastLegalType = i + 1;
13253           // Or check whether a truncstore is legal.
13254         } else if (TLI.getTypeAction(Context, StoreTy) ==
13255                    TargetLowering::TypePromoteInteger) {
13256           EVT LegalizedStoredValueTy =
13257               TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
13258           if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13259               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13260               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13261                                      FirstStoreAlign, &IsFast) &&
13262               IsFast) {
13263             LastIntegerTrunc = true;
13264             LastLegalType = i + 1;
13265           }
13266         }
13267 
13268         // We only use vectors if the constant is known to be zero or the target
13269         // allows it and the function is not marked with the noimplicitfloat
13270         // attribute.
13271         if ((!NonZero ||
13272              TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
13273             !NoVectors) {
13274           // Find a legal type for the vector store.
13275           unsigned Elts = (i + 1) * NumMemElts;
13276           EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13277           if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
13278               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13279               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13280                                      FirstStoreAlign, &IsFast) &&
13281               IsFast)
13282             LastLegalVectorType = i + 1;
13283         }
13284       }
13285 
13286       bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
13287       unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
13288 
13289       // Check if we found a legal integer type that creates a meaningful merge.
13290       if (NumElem < 2) {
13291         // We know that candidate stores are in order and of correct
13292         // shape. While there is no mergeable sequence from the
13293         // beginning one may start later in the sequence. The only
13294         // reason a merge of size N could have failed where another of
13295         // the same size would not have, is if the alignment has
13296         // improved or we've dropped a non-zero value. Drop as many
13297         // candidates as we can here.
13298         unsigned NumSkip = 1;
13299         while (
13300             (NumSkip < NumConsecutiveStores) &&
13301             (NumSkip < FirstZeroAfterNonZero) &&
13302             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
13303           NumSkip++;
13304         }
13305         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13306         continue;
13307       }
13308 
13309       bool Merged = MergeStoresOfConstantsOrVecElts(
13310           StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
13311       RV |= Merged;
13312 
13313       // Remove merged stores for next iteration.
13314       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13315       continue;
13316     }
13317 
13318     // When extracting multiple vector elements, try to store them
13319     // in one vector store rather than a sequence of scalar stores.
13320     if (IsExtractVecSrc) {
13321       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13322       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13323       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13324       unsigned NumStoresToMerge = 1;
13325       for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13326         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13327         SDValue StVal = peekThroughBitcast(St->getValue());
13328         // This restriction could be loosened.
13329         // Bail out if any stored values are not elements extracted from a
13330         // vector. It should be possible to handle mixed sources, but load
13331         // sources need more careful handling (see the block of code below that
13332         // handles consecutive loads).
13333         if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13334             StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13335           return RV;
13336 
13337         // Find a legal type for the vector store.
13338         unsigned Elts = (i + 1) * NumMemElts;
13339         EVT Ty =
13340             EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13341         bool IsFast;
13342         if (TLI.isTypeLegal(Ty) &&
13343             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13344             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13345                                    FirstStoreAlign, &IsFast) &&
13346             IsFast)
13347           NumStoresToMerge = i + 1;
13348       }
13349 
13350       // Check if we found a legal integer type that creates a meaningful merge.
13351       if (NumStoresToMerge < 2) {
13352         // We know that candidate stores are in order and of correct
13353         // shape. While there is no mergeable sequence from the
13354         // beginning one may start later in the sequence. The only
13355         // reason a merge of size N could have failed where another of
13356         // the same size would not have, is if the alignment has
13357         // improved. Drop as many candidates as we can here.
13358         unsigned NumSkip = 1;
13359         while ((NumSkip < NumConsecutiveStores) &&
13360                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13361           NumSkip++;
13362 
13363         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13364         continue;
13365       }
13366 
13367       bool Merged = MergeStoresOfConstantsOrVecElts(
13368           StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13369       if (!Merged) {
13370         StoreNodes.erase(StoreNodes.begin(),
13371                          StoreNodes.begin() + NumStoresToMerge);
13372         continue;
13373       }
13374       // Remove merged stores for next iteration.
13375       StoreNodes.erase(StoreNodes.begin(),
13376                        StoreNodes.begin() + NumStoresToMerge);
13377       RV = true;
13378       continue;
13379     }
13380 
13381     // Below we handle the case of multiple consecutive stores that
13382     // come from multiple consecutive loads. We merge them into a single
13383     // wide load and a single wide store.
13384 
13385     // Look for load nodes which are used by the stored values.
13386     SmallVector<MemOpLink, 8> LoadNodes;
13387 
13388     // Find acceptable loads. Loads need to have the same chain (token factor),
13389     // must not be zext, volatile, indexed, and they must be consecutive.
13390     BaseIndexOffset LdBasePtr;
13391     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13392       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13393       SDValue Val = peekThroughBitcast(St->getValue());
13394       LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
13395       if (!Ld)
13396         break;
13397 
13398       // Loads must only have one use.
13399       if (!Ld->hasNUsesOfValue(1, 0))
13400         break;
13401 
13402       // The memory operands must not be volatile.
13403       if (Ld->isVolatile() || Ld->isIndexed())
13404         break;
13405 
13406       // The stored memory type must be the same.
13407       if (Ld->getMemoryVT() != MemVT)
13408         break;
13409 
13410       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
13411       // If this is not the first ptr that we check.
13412       int64_t LdOffset = 0;
13413       if (LdBasePtr.getBase().getNode()) {
13414         // The base ptr must be the same.
13415         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13416           break;
13417       } else {
13418         // Check that all other base pointers are the same as this one.
13419         LdBasePtr = LdPtr;
13420       }
13421 
13422       // We found a potential memory operand to merge.
13423       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13424     }
13425 
13426     if (LoadNodes.size() < 2) {
13427       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
13428       continue;
13429     }
13430 
13431     // If we have load/store pair instructions and we only have two values,
13432     // don't bother merging.
13433     unsigned RequiredAlignment;
13434     if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13435         StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13436       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13437       continue;
13438     }
13439     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13440     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13441     unsigned FirstStoreAlign = FirstInChain->getAlignment();
13442     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13443     unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13444     unsigned FirstLoadAlign = FirstLoad->getAlignment();
13445 
13446     // Scan the memory operations on the chain and find the first
13447     // non-consecutive load memory address. These variables hold the index in
13448     // the store node array.
13449     unsigned LastConsecutiveLoad = 1;
13450     // This variable refers to the size and not index in the array.
13451     unsigned LastLegalVectorType = 1;
13452     unsigned LastLegalIntegerType = 1;
13453     bool isDereferenceable = true;
13454     bool DoIntegerTruncate = false;
13455     StartAddress = LoadNodes[0].OffsetFromBase;
13456     SDValue FirstChain = FirstLoad->getChain();
13457     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13458       // All loads must share the same chain.
13459       if (LoadNodes[i].MemNode->getChain() != FirstChain)
13460         break;
13461 
13462       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13463       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13464         break;
13465       LastConsecutiveLoad = i;
13466 
13467       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13468         isDereferenceable = false;
13469 
13470       // Find a legal type for the vector store.
13471       unsigned Elts = (i + 1) * NumMemElts;
13472       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13473 
13474       bool IsFastSt, IsFastLd;
13475       if (TLI.isTypeLegal(StoreTy) &&
13476           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13477           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13478                                  FirstStoreAlign, &IsFastSt) &&
13479           IsFastSt &&
13480           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13481                                  FirstLoadAlign, &IsFastLd) &&
13482           IsFastLd) {
13483         LastLegalVectorType = i + 1;
13484       }
13485 
13486       // Find a legal type for the integer store.
13487       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13488       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13489       if (TLI.isTypeLegal(StoreTy) &&
13490           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13491           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13492                                  FirstStoreAlign, &IsFastSt) &&
13493           IsFastSt &&
13494           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13495                                  FirstLoadAlign, &IsFastLd) &&
13496           IsFastLd) {
13497         LastLegalIntegerType = i + 1;
13498         DoIntegerTruncate = false;
13499         // Or check whether a truncstore and extload is legal.
13500       } else if (TLI.getTypeAction(Context, StoreTy) ==
13501                  TargetLowering::TypePromoteInteger) {
13502         EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
13503         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
13504             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
13505             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
13506                                StoreTy) &&
13507             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
13508                                StoreTy) &&
13509             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13510             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13511                                    FirstStoreAlign, &IsFastSt) &&
13512             IsFastSt &&
13513             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13514                                    FirstLoadAlign, &IsFastLd) &&
13515             IsFastLd) {
13516           LastLegalIntegerType = i + 1;
13517           DoIntegerTruncate = true;
13518         }
13519       }
13520     }
13521 
13522     // Only use vector types if the vector type is larger than the integer type.
13523     // If they are the same, use integers.
13524     bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
13525     unsigned LastLegalType =
13526         std::max(LastLegalVectorType, LastLegalIntegerType);
13527 
13528     // We add +1 here because the LastXXX variables refer to location while
13529     // the NumElem refers to array/index size.
13530     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
13531     NumElem = std::min(LastLegalType, NumElem);
13532 
13533     if (NumElem < 2) {
13534       // We know that candidate stores are in order and of correct
13535       // shape. While there is no mergeable sequence from the
13536       // beginning one may start later in the sequence. The only
13537       // reason a merge of size N could have failed where another of
13538       // the same size would not have is if the alignment or either
13539       // the load or store has improved. Drop as many candidates as we
13540       // can here.
13541       unsigned NumSkip = 1;
13542       while ((NumSkip < LoadNodes.size()) &&
13543              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
13544              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13545         NumSkip++;
13546       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13547       continue;
13548     }
13549 
13550     // Find if it is better to use vectors or integers to load and store
13551     // to memory.
13552     EVT JointMemOpVT;
13553     if (UseVectorTy) {
13554       // Find a legal type for the vector store.
13555       unsigned Elts = NumElem * NumMemElts;
13556       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13557     } else {
13558       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
13559       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
13560     }
13561 
13562     SDLoc LoadDL(LoadNodes[0].MemNode);
13563     SDLoc StoreDL(StoreNodes[0].MemNode);
13564 
13565     // The merged loads are required to have the same incoming chain, so
13566     // using the first's chain is acceptable.
13567 
13568     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
13569     AddToWorklist(NewStoreChain.getNode());
13570 
13571     MachineMemOperand::Flags MMOFlags = isDereferenceable ?
13572                                           MachineMemOperand::MODereferenceable:
13573                                           MachineMemOperand::MONone;
13574 
13575     SDValue NewLoad, NewStore;
13576     if (UseVectorTy || !DoIntegerTruncate) {
13577       NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
13578                             FirstLoad->getBasePtr(),
13579                             FirstLoad->getPointerInfo(), FirstLoadAlign,
13580                             MMOFlags);
13581       NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
13582                               FirstInChain->getBasePtr(),
13583                               FirstInChain->getPointerInfo(), FirstStoreAlign);
13584     } else { // This must be the truncstore/extload case
13585       EVT ExtendedTy =
13586           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
13587       NewLoad =
13588           DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
13589                          FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
13590                          JointMemOpVT, FirstLoadAlign, MMOFlags);
13591       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
13592                                    FirstInChain->getBasePtr(),
13593                                    FirstInChain->getPointerInfo(), JointMemOpVT,
13594                                    FirstInChain->getAlignment(),
13595                                    FirstInChain->getMemOperand()->getFlags());
13596     }
13597 
13598     // Transfer chain users from old loads to the new load.
13599     for (unsigned i = 0; i < NumElem; ++i) {
13600       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
13601       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
13602                                     SDValue(NewLoad.getNode(), 1));
13603     }
13604 
13605     // Replace the all stores with the new store. Recursively remove
13606     // corresponding value if its no longer used.
13607     for (unsigned i = 0; i < NumElem; ++i) {
13608       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
13609       CombineTo(StoreNodes[i].MemNode, NewStore);
13610       if (Val.getNode()->use_empty())
13611         recursivelyDeleteUnusedNodes(Val.getNode());
13612     }
13613 
13614     RV = true;
13615     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13616   }
13617   return RV;
13618 }
13619 
13620 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
13621   SDLoc SL(ST);
13622   SDValue ReplStore;
13623 
13624   // Replace the chain to avoid dependency.
13625   if (ST->isTruncatingStore()) {
13626     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
13627                                   ST->getBasePtr(), ST->getMemoryVT(),
13628                                   ST->getMemOperand());
13629   } else {
13630     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
13631                              ST->getMemOperand());
13632   }
13633 
13634   // Create token to keep both nodes around.
13635   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
13636                               MVT::Other, ST->getChain(), ReplStore);
13637 
13638   // Make sure the new and old chains are cleaned up.
13639   AddToWorklist(Token.getNode());
13640 
13641   // Don't add users to work list.
13642   return CombineTo(ST, Token, false);
13643 }
13644 
13645 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
13646   SDValue Value = ST->getValue();
13647   if (Value.getOpcode() == ISD::TargetConstantFP)
13648     return SDValue();
13649 
13650   SDLoc DL(ST);
13651 
13652   SDValue Chain = ST->getChain();
13653   SDValue Ptr = ST->getBasePtr();
13654 
13655   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
13656 
13657   // NOTE: If the original store is volatile, this transform must not increase
13658   // the number of stores.  For example, on x86-32 an f64 can be stored in one
13659   // processor operation but an i64 (which is not legal) requires two.  So the
13660   // transform should not be done in this case.
13661 
13662   SDValue Tmp;
13663   switch (CFP->getSimpleValueType(0).SimpleTy) {
13664   default:
13665     llvm_unreachable("Unknown FP type");
13666   case MVT::f16:    // We don't do this for these yet.
13667   case MVT::f80:
13668   case MVT::f128:
13669   case MVT::ppcf128:
13670     return SDValue();
13671   case MVT::f32:
13672     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
13673         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13674       ;
13675       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
13676                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
13677                             MVT::i32);
13678       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
13679     }
13680 
13681     return SDValue();
13682   case MVT::f64:
13683     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
13684          !ST->isVolatile()) ||
13685         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
13686       ;
13687       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
13688                             getZExtValue(), SDLoc(CFP), MVT::i64);
13689       return DAG.getStore(Chain, DL, Tmp,
13690                           Ptr, ST->getMemOperand());
13691     }
13692 
13693     if (!ST->isVolatile() &&
13694         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
13695       // Many FP stores are not made apparent until after legalize, e.g. for
13696       // argument passing.  Since this is so common, custom legalize the
13697       // 64-bit integer store into two 32-bit stores.
13698       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
13699       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
13700       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
13701       if (DAG.getDataLayout().isBigEndian())
13702         std::swap(Lo, Hi);
13703 
13704       unsigned Alignment = ST->getAlignment();
13705       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13706       AAMDNodes AAInfo = ST->getAAInfo();
13707 
13708       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13709                                  ST->getAlignment(), MMOFlags, AAInfo);
13710       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13711                         DAG.getConstant(4, DL, Ptr.getValueType()));
13712       Alignment = MinAlign(Alignment, 4U);
13713       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
13714                                  ST->getPointerInfo().getWithOffset(4),
13715                                  Alignment, MMOFlags, AAInfo);
13716       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13717                          St0, St1);
13718     }
13719 
13720     return SDValue();
13721   }
13722 }
13723 
13724 SDValue DAGCombiner::visitSTORE(SDNode *N) {
13725   StoreSDNode *ST  = cast<StoreSDNode>(N);
13726   SDValue Chain = ST->getChain();
13727   SDValue Value = ST->getValue();
13728   SDValue Ptr   = ST->getBasePtr();
13729 
13730   // If this is a store of a bit convert, store the input value if the
13731   // resultant store does not need a higher alignment than the original.
13732   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
13733       ST->isUnindexed()) {
13734     EVT SVT = Value.getOperand(0).getValueType();
13735     if (((!LegalOperations && !ST->isVolatile()) ||
13736          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
13737         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
13738       unsigned OrigAlign = ST->getAlignment();
13739       bool Fast = false;
13740       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
13741                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
13742           Fast) {
13743         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13744                             ST->getPointerInfo(), OrigAlign,
13745                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
13746       }
13747     }
13748   }
13749 
13750   // Turn 'store undef, Ptr' -> nothing.
13751   if (Value.isUndef() && ST->isUnindexed())
13752     return Chain;
13753 
13754   // Try to infer better alignment information than the store already has.
13755   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
13756     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13757       if (Align > ST->getAlignment()) {
13758         SDValue NewStore =
13759             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
13760                               ST->getMemoryVT(), Align,
13761                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
13762         if (NewStore.getNode() != N)
13763           return CombineTo(ST, NewStore, true);
13764       }
13765     }
13766   }
13767 
13768   // Try transforming a pair floating point load / store ops to integer
13769   // load / store ops.
13770   if (SDValue NewST = TransformFPLoadStorePair(N))
13771     return NewST;
13772 
13773   if (ST->isUnindexed()) {
13774     // Walk up chain skipping non-aliasing memory nodes, on this store and any
13775     // adjacent stores.
13776     if (findBetterNeighborChains(ST)) {
13777       // replaceStoreChain uses CombineTo, which handled all of the worklist
13778       // manipulation. Return the original node to not do anything else.
13779       return SDValue(ST, 0);
13780     }
13781     Chain = ST->getChain();
13782   }
13783 
13784   // FIXME: is there such a thing as a truncating indexed store?
13785   if (ST->isTruncatingStore() && ST->isUnindexed() &&
13786       Value.getValueType().isInteger()) {
13787     // See if we can simplify the input to this truncstore with knowledge that
13788     // only the low bits are being used.  For example:
13789     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
13790     SDValue Shorter = DAG.GetDemandedBits(
13791         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13792                                     ST->getMemoryVT().getScalarSizeInBits()));
13793     AddToWorklist(Value.getNode());
13794     if (Shorter.getNode())
13795       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
13796                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
13797 
13798     // Otherwise, see if we can simplify the operation with
13799     // SimplifyDemandedBits, which only works if the value has a single use.
13800     if (SimplifyDemandedBits(
13801             Value,
13802             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13803                                  ST->getMemoryVT().getScalarSizeInBits()))) {
13804       // Re-visit the store if anything changed and the store hasn't been merged
13805       // with another node (N is deleted) SimplifyDemandedBits will add Value's
13806       // node back to the worklist if necessary, but we also need to re-visit
13807       // the Store node itself.
13808       if (N->getOpcode() != ISD::DELETED_NODE)
13809         AddToWorklist(N);
13810       return SDValue(N, 0);
13811     }
13812   }
13813 
13814   // If this is a load followed by a store to the same location, then the store
13815   // is dead/noop.
13816   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
13817     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
13818         ST->isUnindexed() && !ST->isVolatile() &&
13819         // There can't be any side effects between the load and store, such as
13820         // a call or store.
13821         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
13822       // The store is dead, remove it.
13823       return Chain;
13824     }
13825   }
13826 
13827   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
13828     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
13829         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
13830         ST->getMemoryVT() == ST1->getMemoryVT()) {
13831       // If this is a store followed by a store with the same value to the same
13832       // location, then the store is dead/noop.
13833       if (ST1->getValue() == Value) {
13834         // The store is dead, remove it.
13835         return Chain;
13836       }
13837 
13838       // If this is a store who's preceeding store to the same location
13839       // and no one other node is chained to that store we can effectively
13840       // drop the store. Do not remove stores to undef as they may be used as
13841       // data sinks.
13842       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
13843           !ST1->getBasePtr().isUndef()) {
13844         // ST1 is fully overwritten and can be elided. Combine with it's chain
13845         // value.
13846         CombineTo(ST1, ST1->getChain());
13847         return SDValue();
13848       }
13849     }
13850   }
13851 
13852   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
13853   // truncating store.  We can do this even if this is already a truncstore.
13854   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
13855       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
13856       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
13857                             ST->getMemoryVT())) {
13858     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
13859                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
13860   }
13861 
13862   // Always perform this optimization before types are legal. If the target
13863   // prefers, also try this after legalization to catch stores that were created
13864   // by intrinsics or other nodes.
13865   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
13866     while (true) {
13867       // There can be multiple store sequences on the same chain.
13868       // Keep trying to merge store sequences until we are unable to do so
13869       // or until we merge the last store on the chain.
13870       bool Changed = MergeConsecutiveStores(ST);
13871       if (!Changed) break;
13872       // Return N as merge only uses CombineTo and no worklist clean
13873       // up is necessary.
13874       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
13875         return SDValue(N, 0);
13876     }
13877   }
13878 
13879   // Try transforming N to an indexed store.
13880   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13881     return SDValue(N, 0);
13882 
13883   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
13884   //
13885   // Make sure to do this only after attempting to merge stores in order to
13886   //  avoid changing the types of some subset of stores due to visit order,
13887   //  preventing their merging.
13888   if (isa<ConstantFPSDNode>(ST->getValue())) {
13889     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
13890       return NewSt;
13891   }
13892 
13893   if (SDValue NewSt = splitMergedValStore(ST))
13894     return NewSt;
13895 
13896   return ReduceLoadOpStoreWidth(N);
13897 }
13898 
13899 /// For the instruction sequence of store below, F and I values
13900 /// are bundled together as an i64 value before being stored into memory.
13901 /// Sometimes it is more efficent to generate separate stores for F and I,
13902 /// which can remove the bitwise instructions or sink them to colder places.
13903 ///
13904 ///   (store (or (zext (bitcast F to i32) to i64),
13905 ///              (shl (zext I to i64), 32)), addr)  -->
13906 ///   (store F, addr) and (store I, addr+4)
13907 ///
13908 /// Similarly, splitting for other merged store can also be beneficial, like:
13909 /// For pair of {i32, i32}, i64 store --> two i32 stores.
13910 /// For pair of {i32, i16}, i64 store --> two i32 stores.
13911 /// For pair of {i16, i16}, i32 store --> two i16 stores.
13912 /// For pair of {i16, i8},  i32 store --> two i16 stores.
13913 /// For pair of {i8, i8},   i16 store --> two i8 stores.
13914 ///
13915 /// We allow each target to determine specifically which kind of splitting is
13916 /// supported.
13917 ///
13918 /// The store patterns are commonly seen from the simple code snippet below
13919 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
13920 ///   void goo(const std::pair<int, float> &);
13921 ///   hoo() {
13922 ///     ...
13923 ///     goo(std::make_pair(tmp, ftmp));
13924 ///     ...
13925 ///   }
13926 ///
13927 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
13928   if (OptLevel == CodeGenOpt::None)
13929     return SDValue();
13930 
13931   SDValue Val = ST->getValue();
13932   SDLoc DL(ST);
13933 
13934   // Match OR operand.
13935   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
13936     return SDValue();
13937 
13938   // Match SHL operand and get Lower and Higher parts of Val.
13939   SDValue Op1 = Val.getOperand(0);
13940   SDValue Op2 = Val.getOperand(1);
13941   SDValue Lo, Hi;
13942   if (Op1.getOpcode() != ISD::SHL) {
13943     std::swap(Op1, Op2);
13944     if (Op1.getOpcode() != ISD::SHL)
13945       return SDValue();
13946   }
13947   Lo = Op2;
13948   Hi = Op1.getOperand(0);
13949   if (!Op1.hasOneUse())
13950     return SDValue();
13951 
13952   // Match shift amount to HalfValBitSize.
13953   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
13954   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
13955   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
13956     return SDValue();
13957 
13958   // Lo and Hi are zero-extended from int with size less equal than 32
13959   // to i64.
13960   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
13961       !Lo.getOperand(0).getValueType().isScalarInteger() ||
13962       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
13963       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
13964       !Hi.getOperand(0).getValueType().isScalarInteger() ||
13965       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
13966     return SDValue();
13967 
13968   // Use the EVT of low and high parts before bitcast as the input
13969   // of target query.
13970   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
13971                   ? Lo.getOperand(0).getValueType()
13972                   : Lo.getValueType();
13973   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
13974                    ? Hi.getOperand(0).getValueType()
13975                    : Hi.getValueType();
13976   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
13977     return SDValue();
13978 
13979   // Start to split store.
13980   unsigned Alignment = ST->getAlignment();
13981   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
13982   AAMDNodes AAInfo = ST->getAAInfo();
13983 
13984   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
13985   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
13986   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
13987   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
13988 
13989   SDValue Chain = ST->getChain();
13990   SDValue Ptr = ST->getBasePtr();
13991   // Lower value store.
13992   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
13993                              ST->getAlignment(), MMOFlags, AAInfo);
13994   Ptr =
13995       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
13996                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
13997   // Higher value store.
13998   SDValue St1 =
13999       DAG.getStore(St0, DL, Hi, Ptr,
14000                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14001                    Alignment / 2, MMOFlags, AAInfo);
14002   return St1;
14003 }
14004 
14005 /// Convert a disguised subvector insertion into a shuffle:
14006 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14007 /// bitcast(shuffle (bitcast V), (extended X), Mask)
14008 /// Note: We do not use an insert_subvector node because that requires a legal
14009 /// subvector type.
14010 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14011   SDValue InsertVal = N->getOperand(1);
14012   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14013       !InsertVal.getOperand(0).getValueType().isVector())
14014     return SDValue();
14015 
14016   SDValue SubVec = InsertVal.getOperand(0);
14017   SDValue DestVec = N->getOperand(0);
14018   EVT SubVecVT = SubVec.getValueType();
14019   EVT VT = DestVec.getValueType();
14020   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14021   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14022   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14023 
14024   // Step 1: Create a shuffle mask that implements this insert operation. The
14025   // vector that we are inserting into will be operand 0 of the shuffle, so
14026   // those elements are just 'i'. The inserted subvector is in the first
14027   // positions of operand 1 of the shuffle. Example:
14028   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14029   SmallVector<int, 16> Mask(NumMaskVals);
14030   for (unsigned i = 0; i != NumMaskVals; ++i) {
14031     if (i / NumSrcElts == InsIndex)
14032       Mask[i] = (i % NumSrcElts) + NumMaskVals;
14033     else
14034       Mask[i] = i;
14035   }
14036 
14037   // Bail out if the target can not handle the shuffle we want to create.
14038   EVT SubVecEltVT = SubVecVT.getVectorElementType();
14039   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14040   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14041     return SDValue();
14042 
14043   // Step 2: Create a wide vector from the inserted source vector by appending
14044   // undefined elements. This is the same size as our destination vector.
14045   SDLoc DL(N);
14046   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14047   ConcatOps[0] = SubVec;
14048   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
14049 
14050   // Step 3: Shuffle in the padded subvector.
14051   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
14052   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14053   AddToWorklist(PaddedSubV.getNode());
14054   AddToWorklist(DestVecBC.getNode());
14055   AddToWorklist(Shuf.getNode());
14056   return DAG.getBitcast(VT, Shuf);
14057 }
14058 
14059 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14060   SDValue InVec = N->getOperand(0);
14061   SDValue InVal = N->getOperand(1);
14062   SDValue EltNo = N->getOperand(2);
14063   SDLoc DL(N);
14064 
14065   // If the inserted element is an UNDEF, just use the input vector.
14066   if (InVal.isUndef())
14067     return InVec;
14068 
14069   EVT VT = InVec.getValueType();
14070 
14071   // Remove redundant insertions:
14072   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14073   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14074       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14075     return InVec;
14076 
14077   // We must know which element is being inserted for folds below here.
14078   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14079   if (!IndexC)
14080     return SDValue();
14081   unsigned Elt = IndexC->getZExtValue();
14082 
14083   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
14084     return Shuf;
14085 
14086   // Canonicalize insert_vector_elt dag nodes.
14087   // Example:
14088   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
14089   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
14090   //
14091   // Do this only if the child insert_vector node has one use; also
14092   // do this only if indices are both constants and Idx1 < Idx0.
14093   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
14094       && isa<ConstantSDNode>(InVec.getOperand(2))) {
14095     unsigned OtherElt = InVec.getConstantOperandVal(2);
14096     if (Elt < OtherElt) {
14097       // Swap nodes.
14098       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14099                                   InVec.getOperand(0), InVal, EltNo);
14100       AddToWorklist(NewOp.getNode());
14101       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
14102                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
14103     }
14104   }
14105 
14106   // If we can't generate a legal BUILD_VECTOR, exit
14107   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
14108     return SDValue();
14109 
14110   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
14111   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
14112   // vector elements.
14113   SmallVector<SDValue, 8> Ops;
14114   // Do not combine these two vectors if the output vector will not replace
14115   // the input vector.
14116   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
14117     Ops.append(InVec.getNode()->op_begin(),
14118                InVec.getNode()->op_end());
14119   } else if (InVec.isUndef()) {
14120     unsigned NElts = VT.getVectorNumElements();
14121     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
14122   } else {
14123     return SDValue();
14124   }
14125 
14126   // Insert the element
14127   if (Elt < Ops.size()) {
14128     // All the operands of BUILD_VECTOR must have the same type;
14129     // we enforce that here.
14130     EVT OpVT = Ops[0].getValueType();
14131     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
14132   }
14133 
14134   // Return the new vector
14135   return DAG.getBuildVector(VT, DL, Ops);
14136 }
14137 
14138 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
14139     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
14140   assert(!OriginalLoad->isVolatile());
14141 
14142   EVT ResultVT = EVE->getValueType(0);
14143   EVT VecEltVT = InVecVT.getVectorElementType();
14144   unsigned Align = OriginalLoad->getAlignment();
14145   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
14146       VecEltVT.getTypeForEVT(*DAG.getContext()));
14147 
14148   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
14149     return SDValue();
14150 
14151   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
14152     ISD::NON_EXTLOAD : ISD::EXTLOAD;
14153   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
14154     return SDValue();
14155 
14156   Align = NewAlign;
14157 
14158   SDValue NewPtr = OriginalLoad->getBasePtr();
14159   SDValue Offset;
14160   EVT PtrType = NewPtr.getValueType();
14161   MachinePointerInfo MPI;
14162   SDLoc DL(EVE);
14163   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
14164     int Elt = ConstEltNo->getZExtValue();
14165     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
14166     Offset = DAG.getConstant(PtrOff, DL, PtrType);
14167     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
14168   } else {
14169     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
14170     Offset = DAG.getNode(
14171         ISD::MUL, DL, PtrType, Offset,
14172         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
14173     MPI = OriginalLoad->getPointerInfo();
14174   }
14175   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
14176 
14177   // The replacement we need to do here is a little tricky: we need to
14178   // replace an extractelement of a load with a load.
14179   // Use ReplaceAllUsesOfValuesWith to do the replacement.
14180   // Note that this replacement assumes that the extractvalue is the only
14181   // use of the load; that's okay because we don't want to perform this
14182   // transformation in other cases anyway.
14183   SDValue Load;
14184   SDValue Chain;
14185   if (ResultVT.bitsGT(VecEltVT)) {
14186     // If the result type of vextract is wider than the load, then issue an
14187     // extending load instead.
14188     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
14189                                                   VecEltVT)
14190                                    ? ISD::ZEXTLOAD
14191                                    : ISD::EXTLOAD;
14192     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
14193                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
14194                           Align, OriginalLoad->getMemOperand()->getFlags(),
14195                           OriginalLoad->getAAInfo());
14196     Chain = Load.getValue(1);
14197   } else {
14198     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
14199                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
14200                        OriginalLoad->getAAInfo());
14201     Chain = Load.getValue(1);
14202     if (ResultVT.bitsLT(VecEltVT))
14203       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
14204     else
14205       Load = DAG.getBitcast(ResultVT, Load);
14206   }
14207   WorklistRemover DeadNodes(*this);
14208   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
14209   SDValue To[] = { Load, Chain };
14210   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
14211   // Since we're explicitly calling ReplaceAllUses, add the new node to the
14212   // worklist explicitly as well.
14213   AddToWorklist(Load.getNode());
14214   AddUsersToWorklist(Load.getNode()); // Add users too
14215   // Make sure to revisit this node to clean it up; it will usually be dead.
14216   AddToWorklist(EVE);
14217   ++OpsNarrowed;
14218   return SDValue(EVE, 0);
14219 }
14220 
14221 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
14222   // (vextract (scalar_to_vector val, 0) -> val
14223   SDValue InVec = N->getOperand(0);
14224   EVT VT = InVec.getValueType();
14225   EVT NVT = N->getValueType(0);
14226 
14227   if (InVec.isUndef())
14228     return DAG.getUNDEF(NVT);
14229 
14230   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14231     // Check if the result type doesn't match the inserted element type. A
14232     // SCALAR_TO_VECTOR may truncate the inserted element and the
14233     // EXTRACT_VECTOR_ELT may widen the extracted vector.
14234     SDValue InOp = InVec.getOperand(0);
14235     if (InOp.getValueType() != NVT) {
14236       assert(InOp.getValueType().isInteger() && NVT.isInteger());
14237       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
14238     }
14239     return InOp;
14240   }
14241 
14242   SDValue EltNo = N->getOperand(1);
14243   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
14244 
14245   // extract_vector_elt of out-of-bounds element -> UNDEF
14246   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
14247     return DAG.getUNDEF(NVT);
14248 
14249   // extract_vector_elt (build_vector x, y), 1 -> y
14250   if (ConstEltNo &&
14251       InVec.getOpcode() == ISD::BUILD_VECTOR &&
14252       TLI.isTypeLegal(VT) &&
14253       (InVec.hasOneUse() ||
14254        TLI.aggressivelyPreferBuildVectorSources(VT))) {
14255     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
14256     EVT InEltVT = Elt.getValueType();
14257 
14258     // Sometimes build_vector's scalar input types do not match result type.
14259     if (NVT == InEltVT)
14260       return Elt;
14261 
14262     // TODO: It may be useful to truncate if free if the build_vector implicitly
14263     // converts.
14264   }
14265 
14266   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
14267   bool isLE = DAG.getDataLayout().isLittleEndian();
14268   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
14269   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
14270       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
14271     SDValue BCSrc = InVec.getOperand(0);
14272     if (BCSrc.getValueType().isScalarInteger())
14273       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
14274   }
14275 
14276   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
14277   //
14278   // This only really matters if the index is non-constant since other combines
14279   // on the constant elements already work.
14280   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
14281       EltNo == InVec.getOperand(2)) {
14282     SDValue Elt = InVec.getOperand(1);
14283     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
14284   }
14285 
14286   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
14287   // We only perform this optimization before the op legalization phase because
14288   // we may introduce new vector instructions which are not backed by TD
14289   // patterns. For example on AVX, extracting elements from a wide vector
14290   // without using extract_subvector. However, if we can find an underlying
14291   // scalar value, then we can always use that.
14292   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
14293     int NumElem = VT.getVectorNumElements();
14294     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
14295     // Find the new index to extract from.
14296     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
14297 
14298     // Extracting an undef index is undef.
14299     if (OrigElt == -1)
14300       return DAG.getUNDEF(NVT);
14301 
14302     // Select the right vector half to extract from.
14303     SDValue SVInVec;
14304     if (OrigElt < NumElem) {
14305       SVInVec = InVec->getOperand(0);
14306     } else {
14307       SVInVec = InVec->getOperand(1);
14308       OrigElt -= NumElem;
14309     }
14310 
14311     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
14312       SDValue InOp = SVInVec.getOperand(OrigElt);
14313       if (InOp.getValueType() != NVT) {
14314         assert(InOp.getValueType().isInteger() && NVT.isInteger());
14315         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
14316       }
14317 
14318       return InOp;
14319     }
14320 
14321     // FIXME: We should handle recursing on other vector shuffles and
14322     // scalar_to_vector here as well.
14323 
14324     if (!LegalOperations ||
14325         // FIXME: Should really be just isOperationLegalOrCustom.
14326         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
14327         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
14328       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14329       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
14330                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
14331     }
14332   }
14333 
14334   bool BCNumEltsChanged = false;
14335   EVT ExtVT = VT.getVectorElementType();
14336   EVT LVT = ExtVT;
14337 
14338   // If the result of load has to be truncated, then it's not necessarily
14339   // profitable.
14340   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
14341     return SDValue();
14342 
14343   if (InVec.getOpcode() == ISD::BITCAST) {
14344     // Don't duplicate a load with other uses.
14345     if (!InVec.hasOneUse())
14346       return SDValue();
14347 
14348     EVT BCVT = InVec.getOperand(0).getValueType();
14349     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
14350       return SDValue();
14351     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
14352       BCNumEltsChanged = true;
14353     InVec = InVec.getOperand(0);
14354     ExtVT = BCVT.getVectorElementType();
14355   }
14356 
14357   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14358   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
14359       ISD::isNormalLoad(InVec.getNode()) &&
14360       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
14361     SDValue Index = N->getOperand(1);
14362     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
14363       if (!OrigLoad->isVolatile()) {
14364         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14365                                                              OrigLoad);
14366       }
14367     }
14368   }
14369 
14370   // Perform only after legalization to ensure build_vector / vector_shuffle
14371   // optimizations have already been done.
14372   if (!LegalOperations) return SDValue();
14373 
14374   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14375   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14376   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14377 
14378   if (ConstEltNo) {
14379     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14380 
14381     LoadSDNode *LN0 = nullptr;
14382     const ShuffleVectorSDNode *SVN = nullptr;
14383     if (ISD::isNormalLoad(InVec.getNode())) {
14384       LN0 = cast<LoadSDNode>(InVec);
14385     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14386                InVec.getOperand(0).getValueType() == ExtVT &&
14387                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14388       // Don't duplicate a load with other uses.
14389       if (!InVec.hasOneUse())
14390         return SDValue();
14391 
14392       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14393     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14394       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14395       // =>
14396       // (load $addr+1*size)
14397 
14398       // Don't duplicate a load with other uses.
14399       if (!InVec.hasOneUse())
14400         return SDValue();
14401 
14402       // If the bit convert changed the number of elements, it is unsafe
14403       // to examine the mask.
14404       if (BCNumEltsChanged)
14405         return SDValue();
14406 
14407       // Select the input vector, guarding against out of range extract vector.
14408       unsigned NumElems = VT.getVectorNumElements();
14409       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14410       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14411 
14412       if (InVec.getOpcode() == ISD::BITCAST) {
14413         // Don't duplicate a load with other uses.
14414         if (!InVec.hasOneUse())
14415           return SDValue();
14416 
14417         InVec = InVec.getOperand(0);
14418       }
14419       if (ISD::isNormalLoad(InVec.getNode())) {
14420         LN0 = cast<LoadSDNode>(InVec);
14421         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14422         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14423       }
14424     }
14425 
14426     // Make sure we found a non-volatile load and the extractelement is
14427     // the only use.
14428     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14429       return SDValue();
14430 
14431     // If Idx was -1 above, Elt is going to be -1, so just return undef.
14432     if (Elt == -1)
14433       return DAG.getUNDEF(LVT);
14434 
14435     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14436   }
14437 
14438   return SDValue();
14439 }
14440 
14441 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
14442 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14443   // We perform this optimization post type-legalization because
14444   // the type-legalizer often scalarizes integer-promoted vectors.
14445   // Performing this optimization before may create bit-casts which
14446   // will be type-legalized to complex code sequences.
14447   // We perform this optimization only before the operation legalizer because we
14448   // may introduce illegal operations.
14449   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14450     return SDValue();
14451 
14452   unsigned NumInScalars = N->getNumOperands();
14453   SDLoc DL(N);
14454   EVT VT = N->getValueType(0);
14455 
14456   // Check to see if this is a BUILD_VECTOR of a bunch of values
14457   // which come from any_extend or zero_extend nodes. If so, we can create
14458   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14459   // optimizations. We do not handle sign-extend because we can't fill the sign
14460   // using shuffles.
14461   EVT SourceType = MVT::Other;
14462   bool AllAnyExt = true;
14463 
14464   for (unsigned i = 0; i != NumInScalars; ++i) {
14465     SDValue In = N->getOperand(i);
14466     // Ignore undef inputs.
14467     if (In.isUndef()) continue;
14468 
14469     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
14470     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
14471 
14472     // Abort if the element is not an extension.
14473     if (!ZeroExt && !AnyExt) {
14474       SourceType = MVT::Other;
14475       break;
14476     }
14477 
14478     // The input is a ZeroExt or AnyExt. Check the original type.
14479     EVT InTy = In.getOperand(0).getValueType();
14480 
14481     // Check that all of the widened source types are the same.
14482     if (SourceType == MVT::Other)
14483       // First time.
14484       SourceType = InTy;
14485     else if (InTy != SourceType) {
14486       // Multiple income types. Abort.
14487       SourceType = MVT::Other;
14488       break;
14489     }
14490 
14491     // Check if all of the extends are ANY_EXTENDs.
14492     AllAnyExt &= AnyExt;
14493   }
14494 
14495   // In order to have valid types, all of the inputs must be extended from the
14496   // same source type and all of the inputs must be any or zero extend.
14497   // Scalar sizes must be a power of two.
14498   EVT OutScalarTy = VT.getScalarType();
14499   bool ValidTypes = SourceType != MVT::Other &&
14500                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
14501                  isPowerOf2_32(SourceType.getSizeInBits());
14502 
14503   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
14504   // turn into a single shuffle instruction.
14505   if (!ValidTypes)
14506     return SDValue();
14507 
14508   bool isLE = DAG.getDataLayout().isLittleEndian();
14509   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
14510   assert(ElemRatio > 1 && "Invalid element size ratio");
14511   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
14512                                DAG.getConstant(0, DL, SourceType);
14513 
14514   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
14515   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
14516 
14517   // Populate the new build_vector
14518   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
14519     SDValue Cast = N->getOperand(i);
14520     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
14521             Cast.getOpcode() == ISD::ZERO_EXTEND ||
14522             Cast.isUndef()) && "Invalid cast opcode");
14523     SDValue In;
14524     if (Cast.isUndef())
14525       In = DAG.getUNDEF(SourceType);
14526     else
14527       In = Cast->getOperand(0);
14528     unsigned Index = isLE ? (i * ElemRatio) :
14529                             (i * ElemRatio + (ElemRatio - 1));
14530 
14531     assert(Index < Ops.size() && "Invalid index");
14532     Ops[Index] = In;
14533   }
14534 
14535   // The type of the new BUILD_VECTOR node.
14536   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
14537   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
14538          "Invalid vector size");
14539   // Check if the new vector type is legal.
14540   if (!isTypeLegal(VecVT)) return SDValue();
14541 
14542   // Make the new BUILD_VECTOR.
14543   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
14544 
14545   // The new BUILD_VECTOR node has the potential to be further optimized.
14546   AddToWorklist(BV.getNode());
14547   // Bitcast to the desired type.
14548   return DAG.getBitcast(VT, BV);
14549 }
14550 
14551 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
14552   EVT VT = N->getValueType(0);
14553 
14554   unsigned NumInScalars = N->getNumOperands();
14555   SDLoc DL(N);
14556 
14557   EVT SrcVT = MVT::Other;
14558   unsigned Opcode = ISD::DELETED_NODE;
14559   unsigned NumDefs = 0;
14560 
14561   for (unsigned i = 0; i != NumInScalars; ++i) {
14562     SDValue In = N->getOperand(i);
14563     unsigned Opc = In.getOpcode();
14564 
14565     if (Opc == ISD::UNDEF)
14566       continue;
14567 
14568     // If all scalar values are floats and converted from integers.
14569     if (Opcode == ISD::DELETED_NODE &&
14570         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
14571       Opcode = Opc;
14572     }
14573 
14574     if (Opc != Opcode)
14575       return SDValue();
14576 
14577     EVT InVT = In.getOperand(0).getValueType();
14578 
14579     // If all scalar values are typed differently, bail out. It's chosen to
14580     // simplify BUILD_VECTOR of integer types.
14581     if (SrcVT == MVT::Other)
14582       SrcVT = InVT;
14583     if (SrcVT != InVT)
14584       return SDValue();
14585     NumDefs++;
14586   }
14587 
14588   // If the vector has just one element defined, it's not worth to fold it into
14589   // a vectorized one.
14590   if (NumDefs < 2)
14591     return SDValue();
14592 
14593   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
14594          && "Should only handle conversion from integer to float.");
14595   assert(SrcVT != MVT::Other && "Cannot determine source type!");
14596 
14597   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
14598 
14599   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
14600     return SDValue();
14601 
14602   // Just because the floating-point vector type is legal does not necessarily
14603   // mean that the corresponding integer vector type is.
14604   if (!isTypeLegal(NVT))
14605     return SDValue();
14606 
14607   SmallVector<SDValue, 8> Opnds;
14608   for (unsigned i = 0; i != NumInScalars; ++i) {
14609     SDValue In = N->getOperand(i);
14610 
14611     if (In.isUndef())
14612       Opnds.push_back(DAG.getUNDEF(SrcVT));
14613     else
14614       Opnds.push_back(In.getOperand(0));
14615   }
14616   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
14617   AddToWorklist(BV.getNode());
14618 
14619   return DAG.getNode(Opcode, DL, VT, BV);
14620 }
14621 
14622 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
14623                                            ArrayRef<int> VectorMask,
14624                                            SDValue VecIn1, SDValue VecIn2,
14625                                            unsigned LeftIdx) {
14626   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14627   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
14628 
14629   EVT VT = N->getValueType(0);
14630   EVT InVT1 = VecIn1.getValueType();
14631   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
14632 
14633   unsigned Vec2Offset = 0;
14634   unsigned NumElems = VT.getVectorNumElements();
14635   unsigned ShuffleNumElems = NumElems;
14636 
14637   // In case both the input vectors are extracted from same base
14638   // vector we do not need extra addend (Vec2Offset) while
14639   // computing shuffle mask.
14640   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14641       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
14642       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
14643     Vec2Offset = InVT1.getVectorNumElements();
14644 
14645   // We can't generate a shuffle node with mismatched input and output types.
14646   // Try to make the types match the type of the output.
14647   if (InVT1 != VT || InVT2 != VT) {
14648     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
14649       // If the output vector length is a multiple of both input lengths,
14650       // we can concatenate them and pad the rest with undefs.
14651       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
14652       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
14653       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
14654       ConcatOps[0] = VecIn1;
14655       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
14656       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14657       VecIn2 = SDValue();
14658     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
14659       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
14660         return SDValue();
14661 
14662       if (!VecIn2.getNode()) {
14663         // If we only have one input vector, and it's twice the size of the
14664         // output, split it in two.
14665         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
14666                              DAG.getConstant(NumElems, DL, IdxTy));
14667         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
14668         // Since we now have shorter input vectors, adjust the offset of the
14669         // second vector's start.
14670         Vec2Offset = NumElems;
14671       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
14672         // VecIn1 is wider than the output, and we have another, possibly
14673         // smaller input. Pad the smaller input with undefs, shuffle at the
14674         // input vector width, and extract the output.
14675         // The shuffle type is different than VT, so check legality again.
14676         if (LegalOperations &&
14677             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
14678           return SDValue();
14679 
14680         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
14681         // lower it back into a BUILD_VECTOR. So if the inserted type is
14682         // illegal, don't even try.
14683         if (InVT1 != InVT2) {
14684           if (!TLI.isTypeLegal(InVT2))
14685             return SDValue();
14686           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
14687                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
14688         }
14689         ShuffleNumElems = NumElems * 2;
14690       } else {
14691         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
14692         // than VecIn1. We can't handle this for now - this case will disappear
14693         // when we start sorting the vectors by type.
14694         return SDValue();
14695       }
14696     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
14697                InVT1.getSizeInBits() == VT.getSizeInBits()) {
14698       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
14699       ConcatOps[0] = VecIn2;
14700       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
14701     } else {
14702       // TODO: Support cases where the length mismatch isn't exactly by a
14703       // factor of 2.
14704       // TODO: Move this check upwards, so that if we have bad type
14705       // mismatches, we don't create any DAG nodes.
14706       return SDValue();
14707     }
14708   }
14709 
14710   // Initialize mask to undef.
14711   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
14712 
14713   // Only need to run up to the number of elements actually used, not the
14714   // total number of elements in the shuffle - if we are shuffling a wider
14715   // vector, the high lanes should be set to undef.
14716   for (unsigned i = 0; i != NumElems; ++i) {
14717     if (VectorMask[i] <= 0)
14718       continue;
14719 
14720     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
14721     if (VectorMask[i] == (int)LeftIdx) {
14722       Mask[i] = ExtIndex;
14723     } else if (VectorMask[i] == (int)LeftIdx + 1) {
14724       Mask[i] = Vec2Offset + ExtIndex;
14725     }
14726   }
14727 
14728   // The type the input vectors may have changed above.
14729   InVT1 = VecIn1.getValueType();
14730 
14731   // If we already have a VecIn2, it should have the same type as VecIn1.
14732   // If we don't, get an undef/zero vector of the appropriate type.
14733   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
14734   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
14735 
14736   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
14737   if (ShuffleNumElems > NumElems)
14738     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
14739 
14740   return Shuffle;
14741 }
14742 
14743 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
14744 // operations. If the types of the vectors we're extracting from allow it,
14745 // turn this into a vector_shuffle node.
14746 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
14747   SDLoc DL(N);
14748   EVT VT = N->getValueType(0);
14749 
14750   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
14751   if (!isTypeLegal(VT))
14752     return SDValue();
14753 
14754   // May only combine to shuffle after legalize if shuffle is legal.
14755   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
14756     return SDValue();
14757 
14758   bool UsesZeroVector = false;
14759   unsigned NumElems = N->getNumOperands();
14760 
14761   // Record, for each element of the newly built vector, which input vector
14762   // that element comes from. -1 stands for undef, 0 for the zero vector,
14763   // and positive values for the input vectors.
14764   // VectorMask maps each element to its vector number, and VecIn maps vector
14765   // numbers to their initial SDValues.
14766 
14767   SmallVector<int, 8> VectorMask(NumElems, -1);
14768   SmallVector<SDValue, 8> VecIn;
14769   VecIn.push_back(SDValue());
14770 
14771   for (unsigned i = 0; i != NumElems; ++i) {
14772     SDValue Op = N->getOperand(i);
14773 
14774     if (Op.isUndef())
14775       continue;
14776 
14777     // See if we can use a blend with a zero vector.
14778     // TODO: Should we generalize this to a blend with an arbitrary constant
14779     // vector?
14780     if (isNullConstant(Op) || isNullFPConstant(Op)) {
14781       UsesZeroVector = true;
14782       VectorMask[i] = 0;
14783       continue;
14784     }
14785 
14786     // Not an undef or zero. If the input is something other than an
14787     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
14788     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14789         !isa<ConstantSDNode>(Op.getOperand(1)))
14790       return SDValue();
14791     SDValue ExtractedFromVec = Op.getOperand(0);
14792 
14793     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
14794     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
14795       return SDValue();
14796 
14797     // All inputs must have the same element type as the output.
14798     if (VT.getVectorElementType() !=
14799         ExtractedFromVec.getValueType().getVectorElementType())
14800       return SDValue();
14801 
14802     // Have we seen this input vector before?
14803     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
14804     // a map back from SDValues to numbers isn't worth it.
14805     unsigned Idx = std::distance(
14806         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
14807     if (Idx == VecIn.size())
14808       VecIn.push_back(ExtractedFromVec);
14809 
14810     VectorMask[i] = Idx;
14811   }
14812 
14813   // If we didn't find at least one input vector, bail out.
14814   if (VecIn.size() < 2)
14815     return SDValue();
14816 
14817   // If all the Operands of BUILD_VECTOR extract from same
14818   // vector, then split the vector efficiently based on the maximum
14819   // vector access index and adjust the VectorMask and
14820   // VecIn accordingly.
14821   if (VecIn.size() == 2) {
14822     unsigned MaxIndex = 0;
14823     unsigned NearestPow2 = 0;
14824     SDValue Vec = VecIn.back();
14825     EVT InVT = Vec.getValueType();
14826     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14827     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
14828 
14829     for (unsigned i = 0; i < NumElems; i++) {
14830       if (VectorMask[i] <= 0)
14831         continue;
14832       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
14833       IndexVec[i] = Index;
14834       MaxIndex = std::max(MaxIndex, Index);
14835     }
14836 
14837     NearestPow2 = PowerOf2Ceil(MaxIndex);
14838     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
14839         NumElems * 2 < NearestPow2) {
14840       unsigned SplitSize = NearestPow2 / 2;
14841       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
14842                                      InVT.getVectorElementType(), SplitSize);
14843       if (TLI.isTypeLegal(SplitVT)) {
14844         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14845                                      DAG.getConstant(SplitSize, DL, IdxTy));
14846         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
14847                                      DAG.getConstant(0, DL, IdxTy));
14848         VecIn.pop_back();
14849         VecIn.push_back(VecIn1);
14850         VecIn.push_back(VecIn2);
14851 
14852         for (unsigned i = 0; i < NumElems; i++) {
14853           if (VectorMask[i] <= 0)
14854             continue;
14855           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
14856         }
14857       }
14858     }
14859   }
14860 
14861   // TODO: We want to sort the vectors by descending length, so that adjacent
14862   // pairs have similar length, and the longer vector is always first in the
14863   // pair.
14864 
14865   // TODO: Should this fire if some of the input vectors has illegal type (like
14866   // it does now), or should we let legalization run its course first?
14867 
14868   // Shuffle phase:
14869   // Take pairs of vectors, and shuffle them so that the result has elements
14870   // from these vectors in the correct places.
14871   // For example, given:
14872   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
14873   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
14874   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
14875   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
14876   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
14877   // We will generate:
14878   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
14879   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
14880   SmallVector<SDValue, 4> Shuffles;
14881   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
14882     unsigned LeftIdx = 2 * In + 1;
14883     SDValue VecLeft = VecIn[LeftIdx];
14884     SDValue VecRight =
14885         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
14886 
14887     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
14888                                                 VecRight, LeftIdx))
14889       Shuffles.push_back(Shuffle);
14890     else
14891       return SDValue();
14892   }
14893 
14894   // If we need the zero vector as an "ingredient" in the blend tree, add it
14895   // to the list of shuffles.
14896   if (UsesZeroVector)
14897     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
14898                                       : DAG.getConstantFP(0.0, DL, VT));
14899 
14900   // If we only have one shuffle, we're done.
14901   if (Shuffles.size() == 1)
14902     return Shuffles[0];
14903 
14904   // Update the vector mask to point to the post-shuffle vectors.
14905   for (int &Vec : VectorMask)
14906     if (Vec == 0)
14907       Vec = Shuffles.size() - 1;
14908     else
14909       Vec = (Vec - 1) / 2;
14910 
14911   // More than one shuffle. Generate a binary tree of blends, e.g. if from
14912   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
14913   // generate:
14914   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
14915   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
14916   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
14917   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
14918   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
14919   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
14920   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
14921 
14922   // Make sure the initial size of the shuffle list is even.
14923   if (Shuffles.size() % 2)
14924     Shuffles.push_back(DAG.getUNDEF(VT));
14925 
14926   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
14927     if (CurSize % 2) {
14928       Shuffles[CurSize] = DAG.getUNDEF(VT);
14929       CurSize++;
14930     }
14931     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
14932       int Left = 2 * In;
14933       int Right = 2 * In + 1;
14934       SmallVector<int, 8> Mask(NumElems, -1);
14935       for (unsigned i = 0; i != NumElems; ++i) {
14936         if (VectorMask[i] == Left) {
14937           Mask[i] = i;
14938           VectorMask[i] = In;
14939         } else if (VectorMask[i] == Right) {
14940           Mask[i] = i + NumElems;
14941           VectorMask[i] = In;
14942         }
14943       }
14944 
14945       Shuffles[In] =
14946           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
14947     }
14948   }
14949   return Shuffles[0];
14950 }
14951 
14952 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
14953   EVT VT = N->getValueType(0);
14954 
14955   // A vector built entirely of undefs is undef.
14956   if (ISD::allOperandsUndef(N))
14957     return DAG.getUNDEF(VT);
14958 
14959   // If this is a splat of a bitcast from another vector, change to a
14960   // concat_vector.
14961   // For example:
14962   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
14963   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
14964   //
14965   // If X is a build_vector itself, the concat can become a larger build_vector.
14966   // TODO: Maybe this is useful for non-splat too?
14967   if (!LegalOperations) {
14968     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
14969       Splat = peekThroughBitcast(Splat);
14970       EVT SrcVT = Splat.getValueType();
14971       if (SrcVT.isVector()) {
14972         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
14973         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
14974                                      SrcVT.getVectorElementType(), NumElts);
14975         SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
14976         SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
14977         return DAG.getBitcast(VT, Concat);
14978       }
14979     }
14980   }
14981 
14982   // Check if we can express BUILD VECTOR via subvector extract.
14983   if (!LegalTypes && (N->getNumOperands() > 1)) {
14984     SDValue Op0 = N->getOperand(0);
14985     auto checkElem = [&](SDValue Op) -> uint64_t {
14986       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
14987           (Op0.getOperand(0) == Op.getOperand(0)))
14988         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
14989           return CNode->getZExtValue();
14990       return -1;
14991     };
14992 
14993     int Offset = checkElem(Op0);
14994     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
14995       if (Offset + i != checkElem(N->getOperand(i))) {
14996         Offset = -1;
14997         break;
14998       }
14999     }
15000 
15001     if ((Offset == 0) &&
15002         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
15003       return Op0.getOperand(0);
15004     if ((Offset != -1) &&
15005         ((Offset % N->getValueType(0).getVectorNumElements()) ==
15006          0)) // IDX must be multiple of output size.
15007       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
15008                          Op0.getOperand(0), Op0.getOperand(1));
15009   }
15010 
15011   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
15012     return V;
15013 
15014   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
15015     return V;
15016 
15017   if (SDValue V = reduceBuildVecToShuffle(N))
15018     return V;
15019 
15020   return SDValue();
15021 }
15022 
15023 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
15024   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15025   EVT OpVT = N->getOperand(0).getValueType();
15026 
15027   // If the operands are legal vectors, leave them alone.
15028   if (TLI.isTypeLegal(OpVT))
15029     return SDValue();
15030 
15031   SDLoc DL(N);
15032   EVT VT = N->getValueType(0);
15033   SmallVector<SDValue, 8> Ops;
15034 
15035   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
15036   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15037 
15038   // Keep track of what we encounter.
15039   bool AnyInteger = false;
15040   bool AnyFP = false;
15041   for (const SDValue &Op : N->ops()) {
15042     if (ISD::BITCAST == Op.getOpcode() &&
15043         !Op.getOperand(0).getValueType().isVector())
15044       Ops.push_back(Op.getOperand(0));
15045     else if (ISD::UNDEF == Op.getOpcode())
15046       Ops.push_back(ScalarUndef);
15047     else
15048       return SDValue();
15049 
15050     // Note whether we encounter an integer or floating point scalar.
15051     // If it's neither, bail out, it could be something weird like x86mmx.
15052     EVT LastOpVT = Ops.back().getValueType();
15053     if (LastOpVT.isFloatingPoint())
15054       AnyFP = true;
15055     else if (LastOpVT.isInteger())
15056       AnyInteger = true;
15057     else
15058       return SDValue();
15059   }
15060 
15061   // If any of the operands is a floating point scalar bitcast to a vector,
15062   // use floating point types throughout, and bitcast everything.
15063   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
15064   if (AnyFP) {
15065     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
15066     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15067     if (AnyInteger) {
15068       for (SDValue &Op : Ops) {
15069         if (Op.getValueType() == SVT)
15070           continue;
15071         if (Op.isUndef())
15072           Op = ScalarUndef;
15073         else
15074           Op = DAG.getBitcast(SVT, Op);
15075       }
15076     }
15077   }
15078 
15079   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
15080                                VT.getSizeInBits() / SVT.getSizeInBits());
15081   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
15082 }
15083 
15084 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
15085 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
15086 // most two distinct vectors the same size as the result, attempt to turn this
15087 // into a legal shuffle.
15088 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
15089   EVT VT = N->getValueType(0);
15090   EVT OpVT = N->getOperand(0).getValueType();
15091   int NumElts = VT.getVectorNumElements();
15092   int NumOpElts = OpVT.getVectorNumElements();
15093 
15094   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
15095   SmallVector<int, 8> Mask;
15096 
15097   for (SDValue Op : N->ops()) {
15098     // Peek through any bitcast.
15099     Op = peekThroughBitcast(Op);
15100 
15101     // UNDEF nodes convert to UNDEF shuffle mask values.
15102     if (Op.isUndef()) {
15103       Mask.append((unsigned)NumOpElts, -1);
15104       continue;
15105     }
15106 
15107     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15108       return SDValue();
15109 
15110     // What vector are we extracting the subvector from and at what index?
15111     SDValue ExtVec = Op.getOperand(0);
15112 
15113     // We want the EVT of the original extraction to correctly scale the
15114     // extraction index.
15115     EVT ExtVT = ExtVec.getValueType();
15116 
15117     // Peek through any bitcast.
15118     ExtVec = peekThroughBitcast(ExtVec);
15119 
15120     // UNDEF nodes convert to UNDEF shuffle mask values.
15121     if (ExtVec.isUndef()) {
15122       Mask.append((unsigned)NumOpElts, -1);
15123       continue;
15124     }
15125 
15126     if (!isa<ConstantSDNode>(Op.getOperand(1)))
15127       return SDValue();
15128     int ExtIdx = Op.getConstantOperandVal(1);
15129 
15130     // Ensure that we are extracting a subvector from a vector the same
15131     // size as the result.
15132     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
15133       return SDValue();
15134 
15135     // Scale the subvector index to account for any bitcast.
15136     int NumExtElts = ExtVT.getVectorNumElements();
15137     if (0 == (NumExtElts % NumElts))
15138       ExtIdx /= (NumExtElts / NumElts);
15139     else if (0 == (NumElts % NumExtElts))
15140       ExtIdx *= (NumElts / NumExtElts);
15141     else
15142       return SDValue();
15143 
15144     // At most we can reference 2 inputs in the final shuffle.
15145     if (SV0.isUndef() || SV0 == ExtVec) {
15146       SV0 = ExtVec;
15147       for (int i = 0; i != NumOpElts; ++i)
15148         Mask.push_back(i + ExtIdx);
15149     } else if (SV1.isUndef() || SV1 == ExtVec) {
15150       SV1 = ExtVec;
15151       for (int i = 0; i != NumOpElts; ++i)
15152         Mask.push_back(i + ExtIdx + NumElts);
15153     } else {
15154       return SDValue();
15155     }
15156   }
15157 
15158   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
15159     return SDValue();
15160 
15161   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
15162                               DAG.getBitcast(VT, SV1), Mask);
15163 }
15164 
15165 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
15166   // If we only have one input vector, we don't need to do any concatenation.
15167   if (N->getNumOperands() == 1)
15168     return N->getOperand(0);
15169 
15170   // Check if all of the operands are undefs.
15171   EVT VT = N->getValueType(0);
15172   if (ISD::allOperandsUndef(N))
15173     return DAG.getUNDEF(VT);
15174 
15175   // Optimize concat_vectors where all but the first of the vectors are undef.
15176   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
15177         return Op.isUndef();
15178       })) {
15179     SDValue In = N->getOperand(0);
15180     assert(In.getValueType().isVector() && "Must concat vectors");
15181 
15182     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
15183     if (In->getOpcode() == ISD::BITCAST &&
15184         !In->getOperand(0).getValueType().isVector()) {
15185       SDValue Scalar = In->getOperand(0);
15186 
15187       // If the bitcast type isn't legal, it might be a trunc of a legal type;
15188       // look through the trunc so we can still do the transform:
15189       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
15190       if (Scalar->getOpcode() == ISD::TRUNCATE &&
15191           !TLI.isTypeLegal(Scalar.getValueType()) &&
15192           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
15193         Scalar = Scalar->getOperand(0);
15194 
15195       EVT SclTy = Scalar->getValueType(0);
15196 
15197       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
15198         return SDValue();
15199 
15200       // Bail out if the vector size is not a multiple of the scalar size.
15201       if (VT.getSizeInBits() % SclTy.getSizeInBits())
15202         return SDValue();
15203 
15204       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
15205       if (VNTNumElms < 2)
15206         return SDValue();
15207 
15208       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
15209       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
15210         return SDValue();
15211 
15212       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
15213       return DAG.getBitcast(VT, Res);
15214     }
15215   }
15216 
15217   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
15218   // We have already tested above for an UNDEF only concatenation.
15219   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
15220   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
15221   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
15222     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
15223   };
15224   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
15225     SmallVector<SDValue, 8> Opnds;
15226     EVT SVT = VT.getScalarType();
15227 
15228     EVT MinVT = SVT;
15229     if (!SVT.isFloatingPoint()) {
15230       // If BUILD_VECTOR are from built from integer, they may have different
15231       // operand types. Get the smallest type and truncate all operands to it.
15232       bool FoundMinVT = false;
15233       for (const SDValue &Op : N->ops())
15234         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15235           EVT OpSVT = Op.getOperand(0).getValueType();
15236           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
15237           FoundMinVT = true;
15238         }
15239       assert(FoundMinVT && "Concat vector type mismatch");
15240     }
15241 
15242     for (const SDValue &Op : N->ops()) {
15243       EVT OpVT = Op.getValueType();
15244       unsigned NumElts = OpVT.getVectorNumElements();
15245 
15246       if (ISD::UNDEF == Op.getOpcode())
15247         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
15248 
15249       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15250         if (SVT.isFloatingPoint()) {
15251           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
15252           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
15253         } else {
15254           for (unsigned i = 0; i != NumElts; ++i)
15255             Opnds.push_back(
15256                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
15257         }
15258       }
15259     }
15260 
15261     assert(VT.getVectorNumElements() == Opnds.size() &&
15262            "Concat vector type mismatch");
15263     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
15264   }
15265 
15266   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
15267   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
15268     return V;
15269 
15270   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
15271   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15272     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
15273       return V;
15274 
15275   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
15276   // nodes often generate nop CONCAT_VECTOR nodes.
15277   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
15278   // place the incoming vectors at the exact same location.
15279   SDValue SingleSource = SDValue();
15280   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
15281 
15282   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15283     SDValue Op = N->getOperand(i);
15284 
15285     if (Op.isUndef())
15286       continue;
15287 
15288     // Check if this is the identity extract:
15289     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15290       return SDValue();
15291 
15292     // Find the single incoming vector for the extract_subvector.
15293     if (SingleSource.getNode()) {
15294       if (Op.getOperand(0) != SingleSource)
15295         return SDValue();
15296     } else {
15297       SingleSource = Op.getOperand(0);
15298 
15299       // Check the source type is the same as the type of the result.
15300       // If not, this concat may extend the vector, so we can not
15301       // optimize it away.
15302       if (SingleSource.getValueType() != N->getValueType(0))
15303         return SDValue();
15304     }
15305 
15306     unsigned IdentityIndex = i * PartNumElem;
15307     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15308     // The extract index must be constant.
15309     if (!CS)
15310       return SDValue();
15311 
15312     // Check that we are reading from the identity index.
15313     if (CS->getZExtValue() != IdentityIndex)
15314       return SDValue();
15315   }
15316 
15317   if (SingleSource.getNode())
15318     return SingleSource;
15319 
15320   return SDValue();
15321 }
15322 
15323 /// If we are extracting a subvector produced by a wide binary operator with at
15324 /// at least one operand that was the result of a vector concatenation, then try
15325 /// to use the narrow vector operands directly to avoid the concatenation and
15326 /// extraction.
15327 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15328   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15329   // some of these bailouts with other transforms.
15330 
15331   // The extract index must be a constant, so we can map it to a concat operand.
15332   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15333   if (!ExtractIndex)
15334     return SDValue();
15335 
15336   // Only handle the case where we are doubling and then halving. A larger ratio
15337   // may require more than two narrow binops to replace the wide binop.
15338   EVT VT = Extract->getValueType(0);
15339   unsigned NumElems = VT.getVectorNumElements();
15340   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15341          "Extract index is not a multiple of the vector length.");
15342   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15343     return SDValue();
15344 
15345   // We are looking for an optionally bitcasted wide vector binary operator
15346   // feeding an extract subvector.
15347   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15348 
15349   // TODO: The motivating case for this transform is an x86 AVX1 target. That
15350   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15351   // flavors, but no other 256-bit integer support. This could be extended to
15352   // handle any binop, but that may require fixing/adding other folds to avoid
15353   // codegen regressions.
15354   unsigned BOpcode = BinOp.getOpcode();
15355   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15356     return SDValue();
15357 
15358   // The binop must be a vector type, so we can chop it in half.
15359   EVT WideBVT = BinOp.getValueType();
15360   if (!WideBVT.isVector())
15361     return SDValue();
15362 
15363   // Bail out if the target does not support a narrower version of the binop.
15364   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15365                                    WideBVT.getVectorNumElements() / 2);
15366   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15367   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15368     return SDValue();
15369 
15370   // Peek through bitcasts of the binary operator operands if needed.
15371   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15372   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15373 
15374   // We need at least one concatenation operation of a binop operand to make
15375   // this transform worthwhile. The concat must double the input vector sizes.
15376   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15377   bool ConcatL =
15378       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15379   bool ConcatR =
15380       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15381   if (!ConcatL && !ConcatR)
15382     return SDValue();
15383 
15384   // If one of the binop operands was not the result of a concat, we must
15385   // extract a half-sized operand for our new narrow binop. We can't just reuse
15386   // the original extract index operand because we may have bitcasted.
15387   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15388   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15389   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15390   SDLoc DL(Extract);
15391 
15392   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15393   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15394   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15395   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15396                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15397                                     BinOp.getOperand(0),
15398                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15399 
15400   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15401                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15402                                     BinOp.getOperand(1),
15403                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15404 
15405   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15406   return DAG.getBitcast(VT, NarrowBinOp);
15407 }
15408 
15409 /// If we are extracting a subvector from a wide vector load, convert to a
15410 /// narrow load to eliminate the extraction:
15411 /// (extract_subvector (load wide vector)) --> (load narrow vector)
15412 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
15413   // TODO: Add support for big-endian. The offset calculation must be adjusted.
15414   if (DAG.getDataLayout().isBigEndian())
15415     return SDValue();
15416 
15417   // TODO: The one-use check is overly conservative. Check the cost of the
15418   // extract instead or remove that condition entirely.
15419   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
15420   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15421   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
15422       !ExtIdx)
15423     return SDValue();
15424 
15425   // The narrow load will be offset from the base address of the old load if
15426   // we are extracting from something besides index 0 (little-endian).
15427   EVT VT = Extract->getValueType(0);
15428   SDLoc DL(Extract);
15429   SDValue BaseAddr = Ld->getOperand(1);
15430   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
15431 
15432   // TODO: Use "BaseIndexOffset" to make this more effective.
15433   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
15434   MachineFunction &MF = DAG.getMachineFunction();
15435   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
15436                                                    VT.getStoreSize());
15437   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
15438   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
15439   return NewLd;
15440 }
15441 
15442 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
15443   EVT NVT = N->getValueType(0);
15444   SDValue V = N->getOperand(0);
15445 
15446   // Extract from UNDEF is UNDEF.
15447   if (V.isUndef())
15448     return DAG.getUNDEF(NVT);
15449 
15450   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
15451     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
15452       return NarrowLoad;
15453 
15454   // Combine:
15455   //    (extract_subvec (concat V1, V2, ...), i)
15456   // Into:
15457   //    Vi if possible
15458   // Only operand 0 is checked as 'concat' assumes all inputs of the same
15459   // type.
15460   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
15461       isa<ConstantSDNode>(N->getOperand(1)) &&
15462       V->getOperand(0).getValueType() == NVT) {
15463     unsigned Idx = N->getConstantOperandVal(1);
15464     unsigned NumElems = NVT.getVectorNumElements();
15465     assert((Idx % NumElems) == 0 &&
15466            "IDX in concat is not a multiple of the result vector length.");
15467     return V->getOperand(Idx / NumElems);
15468   }
15469 
15470   // Skip bitcasting
15471   V = peekThroughBitcast(V);
15472 
15473   // If the input is a build vector. Try to make a smaller build vector.
15474   if (V->getOpcode() == ISD::BUILD_VECTOR) {
15475     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
15476       EVT InVT = V->getValueType(0);
15477       unsigned ExtractSize = NVT.getSizeInBits();
15478       unsigned EltSize = InVT.getScalarSizeInBits();
15479       // Only do this if we won't split any elements.
15480       if (ExtractSize % EltSize == 0) {
15481         unsigned NumElems = ExtractSize / EltSize;
15482         EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
15483                                          InVT.getVectorElementType(), NumElems);
15484         if ((!LegalOperations ||
15485              TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
15486             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
15487           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
15488                             EltSize;
15489 
15490           // Extract the pieces from the original build_vector.
15491           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
15492                                             makeArrayRef(V->op_begin() + IdxVal,
15493                                                          NumElems));
15494           return DAG.getBitcast(NVT, BuildVec);
15495         }
15496       }
15497     }
15498   }
15499 
15500   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
15501     // Handle only simple case where vector being inserted and vector
15502     // being extracted are of same size.
15503     EVT SmallVT = V->getOperand(1).getValueType();
15504     if (!NVT.bitsEq(SmallVT))
15505       return SDValue();
15506 
15507     // Only handle cases where both indexes are constants.
15508     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
15509     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
15510 
15511     if (InsIdx && ExtIdx) {
15512       // Combine:
15513       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
15514       // Into:
15515       //    indices are equal or bit offsets are equal => V1
15516       //    otherwise => (extract_subvec V1, ExtIdx)
15517       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
15518           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
15519         return DAG.getBitcast(NVT, V->getOperand(1));
15520       return DAG.getNode(
15521           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
15522           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
15523           N->getOperand(1));
15524     }
15525   }
15526 
15527   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
15528     return NarrowBOp;
15529 
15530   return SDValue();
15531 }
15532 
15533 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
15534 // or turn a shuffle of a single concat into simpler shuffle then concat.
15535 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
15536   EVT VT = N->getValueType(0);
15537   unsigned NumElts = VT.getVectorNumElements();
15538 
15539   SDValue N0 = N->getOperand(0);
15540   SDValue N1 = N->getOperand(1);
15541   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15542 
15543   SmallVector<SDValue, 4> Ops;
15544   EVT ConcatVT = N0.getOperand(0).getValueType();
15545   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
15546   unsigned NumConcats = NumElts / NumElemsPerConcat;
15547 
15548   // Special case: shuffle(concat(A,B)) can be more efficiently represented
15549   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
15550   // half vector elements.
15551   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
15552       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
15553                   SVN->getMask().end(), [](int i) { return i == -1; })) {
15554     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
15555                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
15556     N1 = DAG.getUNDEF(ConcatVT);
15557     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
15558   }
15559 
15560   // Look at every vector that's inserted. We're looking for exact
15561   // subvector-sized copies from a concatenated vector
15562   for (unsigned I = 0; I != NumConcats; ++I) {
15563     // Make sure we're dealing with a copy.
15564     unsigned Begin = I * NumElemsPerConcat;
15565     bool AllUndef = true, NoUndef = true;
15566     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
15567       if (SVN->getMaskElt(J) >= 0)
15568         AllUndef = false;
15569       else
15570         NoUndef = false;
15571     }
15572 
15573     if (NoUndef) {
15574       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
15575         return SDValue();
15576 
15577       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
15578         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
15579           return SDValue();
15580 
15581       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
15582       if (FirstElt < N0.getNumOperands())
15583         Ops.push_back(N0.getOperand(FirstElt));
15584       else
15585         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
15586 
15587     } else if (AllUndef) {
15588       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
15589     } else { // Mixed with general masks and undefs, can't do optimization.
15590       return SDValue();
15591     }
15592   }
15593 
15594   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
15595 }
15596 
15597 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
15598 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
15599 //
15600 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
15601 // a simplification in some sense, but it isn't appropriate in general: some
15602 // BUILD_VECTORs are substantially cheaper than others. The general case
15603 // of a BUILD_VECTOR requires inserting each element individually (or
15604 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
15605 // all constants is a single constant pool load.  A BUILD_VECTOR where each
15606 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
15607 // are undef lowers to a small number of element insertions.
15608 //
15609 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
15610 // We don't fold shuffles where one side is a non-zero constant, and we don't
15611 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
15612 // non-constant operands. This seems to work out reasonably well in practice.
15613 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
15614                                        SelectionDAG &DAG,
15615                                        const TargetLowering &TLI) {
15616   EVT VT = SVN->getValueType(0);
15617   unsigned NumElts = VT.getVectorNumElements();
15618   SDValue N0 = SVN->getOperand(0);
15619   SDValue N1 = SVN->getOperand(1);
15620 
15621   if (!N0->hasOneUse() || !N1->hasOneUse())
15622     return SDValue();
15623 
15624   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
15625   // discussed above.
15626   if (!N1.isUndef()) {
15627     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
15628     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
15629     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
15630       return SDValue();
15631     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
15632       return SDValue();
15633   }
15634 
15635   // If both inputs are splats of the same value then we can safely merge this
15636   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
15637   bool IsSplat = false;
15638   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
15639   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
15640   if (BV0 && BV1)
15641     if (SDValue Splat0 = BV0->getSplatValue())
15642       IsSplat = (Splat0 == BV1->getSplatValue());
15643 
15644   SmallVector<SDValue, 8> Ops;
15645   SmallSet<SDValue, 16> DuplicateOps;
15646   for (int M : SVN->getMask()) {
15647     SDValue Op = DAG.getUNDEF(VT.getScalarType());
15648     if (M >= 0) {
15649       int Idx = M < (int)NumElts ? M : M - NumElts;
15650       SDValue &S = (M < (int)NumElts ? N0 : N1);
15651       if (S.getOpcode() == ISD::BUILD_VECTOR) {
15652         Op = S.getOperand(Idx);
15653       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15654         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
15655         Op = S.getOperand(0);
15656       } else {
15657         // Operand can't be combined - bail out.
15658         return SDValue();
15659       }
15660     }
15661 
15662     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
15663     // generating a splat; semantically, this is fine, but it's likely to
15664     // generate low-quality code if the target can't reconstruct an appropriate
15665     // shuffle.
15666     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
15667       if (!IsSplat && !DuplicateOps.insert(Op).second)
15668         return SDValue();
15669 
15670     Ops.push_back(Op);
15671   }
15672 
15673   // BUILD_VECTOR requires all inputs to be of the same type, find the
15674   // maximum type and extend them all.
15675   EVT SVT = VT.getScalarType();
15676   if (SVT.isInteger())
15677     for (SDValue &Op : Ops)
15678       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
15679   if (SVT != VT.getScalarType())
15680     for (SDValue &Op : Ops)
15681       Op = TLI.isZExtFree(Op.getValueType(), SVT)
15682                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
15683                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
15684   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
15685 }
15686 
15687 // Match shuffles that can be converted to any_vector_extend_in_reg.
15688 // This is often generated during legalization.
15689 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
15690 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
15691 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
15692                                             SelectionDAG &DAG,
15693                                             const TargetLowering &TLI,
15694                                             bool LegalOperations,
15695                                             bool LegalTypes) {
15696   EVT VT = SVN->getValueType(0);
15697   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15698 
15699   // TODO Add support for big-endian when we have a test case.
15700   if (!VT.isInteger() || IsBigEndian)
15701     return SDValue();
15702 
15703   unsigned NumElts = VT.getVectorNumElements();
15704   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15705   ArrayRef<int> Mask = SVN->getMask();
15706   SDValue N0 = SVN->getOperand(0);
15707 
15708   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
15709   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
15710     for (unsigned i = 0; i != NumElts; ++i) {
15711       if (Mask[i] < 0)
15712         continue;
15713       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
15714         continue;
15715       return false;
15716     }
15717     return true;
15718   };
15719 
15720   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
15721   // power-of-2 extensions as they are the most likely.
15722   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
15723     // Check for non power of 2 vector sizes
15724     if (NumElts % Scale != 0)
15725       continue;
15726     if (!isAnyExtend(Scale))
15727       continue;
15728 
15729     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
15730     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
15731     if (!LegalTypes || TLI.isTypeLegal(OutVT))
15732       if (!LegalOperations ||
15733           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
15734         return DAG.getBitcast(VT,
15735                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
15736   }
15737 
15738   return SDValue();
15739 }
15740 
15741 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
15742 // each source element of a large type into the lowest elements of a smaller
15743 // destination type. This is often generated during legalization.
15744 // If the source node itself was a '*_extend_vector_inreg' node then we should
15745 // then be able to remove it.
15746 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
15747                                         SelectionDAG &DAG) {
15748   EVT VT = SVN->getValueType(0);
15749   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
15750 
15751   // TODO Add support for big-endian when we have a test case.
15752   if (!VT.isInteger() || IsBigEndian)
15753     return SDValue();
15754 
15755   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
15756 
15757   unsigned Opcode = N0.getOpcode();
15758   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
15759       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
15760       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
15761     return SDValue();
15762 
15763   SDValue N00 = N0.getOperand(0);
15764   ArrayRef<int> Mask = SVN->getMask();
15765   unsigned NumElts = VT.getVectorNumElements();
15766   unsigned EltSizeInBits = VT.getScalarSizeInBits();
15767   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
15768   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
15769 
15770   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
15771     return SDValue();
15772   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
15773 
15774   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
15775   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
15776   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
15777   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
15778     for (unsigned i = 0; i != NumElts; ++i) {
15779       if (Mask[i] < 0)
15780         continue;
15781       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
15782         continue;
15783       return false;
15784     }
15785     return true;
15786   };
15787 
15788   // At the moment we just handle the case where we've truncated back to the
15789   // same size as before the extension.
15790   // TODO: handle more extension/truncation cases as cases arise.
15791   if (EltSizeInBits != ExtSrcSizeInBits)
15792     return SDValue();
15793 
15794   // We can remove *extend_vector_inreg only if the truncation happens at
15795   // the same scale as the extension.
15796   if (isTruncate(ExtScale))
15797     return DAG.getBitcast(VT, N00);
15798 
15799   return SDValue();
15800 }
15801 
15802 // Combine shuffles of splat-shuffles of the form:
15803 // shuffle (shuffle V, undef, splat-mask), undef, M
15804 // If splat-mask contains undef elements, we need to be careful about
15805 // introducing undef's in the folded mask which are not the result of composing
15806 // the masks of the shuffles.
15807 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
15808                                      ShuffleVectorSDNode *Splat,
15809                                      SelectionDAG &DAG) {
15810   ArrayRef<int> SplatMask = Splat->getMask();
15811   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
15812 
15813   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
15814   // every undef mask element in the splat-shuffle has a corresponding undef
15815   // element in the user-shuffle's mask or if the composition of mask elements
15816   // would result in undef.
15817   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
15818   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
15819   //   In this case it is not legal to simplify to the splat-shuffle because we
15820   //   may be exposing the users of the shuffle an undef element at index 1
15821   //   which was not there before the combine.
15822   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
15823   //   In this case the composition of masks yields SplatMask, so it's ok to
15824   //   simplify to the splat-shuffle.
15825   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
15826   //   In this case the composed mask includes all undef elements of SplatMask
15827   //   and in addition sets element zero to undef. It is safe to simplify to
15828   //   the splat-shuffle.
15829   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
15830                                        ArrayRef<int> SplatMask) {
15831     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
15832       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
15833           SplatMask[UserMask[i]] != -1)
15834         return false;
15835     return true;
15836   };
15837   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
15838     return SDValue(Splat, 0);
15839 
15840   // Create a new shuffle with a mask that is composed of the two shuffles'
15841   // masks.
15842   SmallVector<int, 32> NewMask;
15843   for (int Idx : UserMask)
15844     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
15845 
15846   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
15847                               Splat->getOperand(0), Splat->getOperand(1),
15848                               NewMask);
15849 }
15850 
15851 /// If the shuffle mask is taking exactly one element from the first vector
15852 /// operand and passing through all other elements from the second vector
15853 /// operand, return the index of the mask element that is choosing an element
15854 /// from the first operand. Otherwise, return -1.
15855 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
15856   int MaskSize = Mask.size();
15857   int EltFromOp0 = -1;
15858   // TODO: This does not match if there are undef elements in the shuffle mask.
15859   // Should we ignore undefs in the shuffle mask instead? The trade-off is
15860   // removing an instruction (a shuffle), but losing the knowledge that some
15861   // vector lanes are not needed.
15862   for (int i = 0; i != MaskSize; ++i) {
15863     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
15864       // We're looking for a shuffle of exactly one element from operand 0.
15865       if (EltFromOp0 != -1)
15866         return -1;
15867       EltFromOp0 = i;
15868     } else if (Mask[i] != i + MaskSize) {
15869       // Nothing from operand 1 can change lanes.
15870       return -1;
15871     }
15872   }
15873   return EltFromOp0;
15874 }
15875 
15876 /// If a shuffle inserts exactly one element from a source vector operand into
15877 /// another vector operand and we can access the specified element as a scalar,
15878 /// then we can eliminate the shuffle.
15879 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
15880                                       SelectionDAG &DAG) {
15881   // First, check if we are taking one element of a vector and shuffling that
15882   // element into another vector.
15883   ArrayRef<int> Mask = Shuf->getMask();
15884   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
15885   SDValue Op0 = Shuf->getOperand(0);
15886   SDValue Op1 = Shuf->getOperand(1);
15887   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
15888   if (ShufOp0Index == -1) {
15889     // Commute mask and check again.
15890     ShuffleVectorSDNode::commuteMask(CommutedMask);
15891     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
15892     if (ShufOp0Index == -1)
15893       return SDValue();
15894     // Commute operands to match the commuted shuffle mask.
15895     std::swap(Op0, Op1);
15896     Mask = CommutedMask;
15897   }
15898 
15899   // The shuffle inserts exactly one element from operand 0 into operand 1.
15900   // Now see if we can access that element as a scalar via a real insert element
15901   // instruction.
15902   // TODO: We can try harder to locate the element as a scalar. Examples: it
15903   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
15904   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
15905          "Shuffle mask value must be from operand 0");
15906   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
15907     return SDValue();
15908 
15909   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
15910   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
15911     return SDValue();
15912 
15913   // There's an existing insertelement with constant insertion index, so we
15914   // don't need to check the legality/profitability of a replacement operation
15915   // that differs at most in the constant value. The target should be able to
15916   // lower any of those in a similar way. If not, legalization will expand this
15917   // to a scalar-to-vector plus shuffle.
15918   //
15919   // Note that the shuffle may move the scalar from the position that the insert
15920   // element used. Therefore, our new insert element occurs at the shuffle's
15921   // mask index value, not the insert's index value.
15922   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
15923   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
15924                                         Op0.getOperand(2).getValueType());
15925   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
15926                      Op1, Op0.getOperand(1), NewInsIndex);
15927 }
15928 
15929 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
15930   EVT VT = N->getValueType(0);
15931   unsigned NumElts = VT.getVectorNumElements();
15932 
15933   SDValue N0 = N->getOperand(0);
15934   SDValue N1 = N->getOperand(1);
15935 
15936   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
15937 
15938   // Canonicalize shuffle undef, undef -> undef
15939   if (N0.isUndef() && N1.isUndef())
15940     return DAG.getUNDEF(VT);
15941 
15942   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15943 
15944   // Canonicalize shuffle v, v -> v, undef
15945   if (N0 == N1) {
15946     SmallVector<int, 8> NewMask;
15947     for (unsigned i = 0; i != NumElts; ++i) {
15948       int Idx = SVN->getMaskElt(i);
15949       if (Idx >= (int)NumElts) Idx -= NumElts;
15950       NewMask.push_back(Idx);
15951     }
15952     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
15953   }
15954 
15955   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
15956   if (N0.isUndef())
15957     return DAG.getCommutedVectorShuffle(*SVN);
15958 
15959   // Remove references to rhs if it is undef
15960   if (N1.isUndef()) {
15961     bool Changed = false;
15962     SmallVector<int, 8> NewMask;
15963     for (unsigned i = 0; i != NumElts; ++i) {
15964       int Idx = SVN->getMaskElt(i);
15965       if (Idx >= (int)NumElts) {
15966         Idx = -1;
15967         Changed = true;
15968       }
15969       NewMask.push_back(Idx);
15970     }
15971     if (Changed)
15972       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
15973   }
15974 
15975   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
15976     return InsElt;
15977 
15978   // A shuffle of a single vector that is a splat can always be folded.
15979   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
15980     if (N1->isUndef() && N0Shuf->isSplat())
15981       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
15982 
15983   // If it is a splat, check if the argument vector is another splat or a
15984   // build_vector.
15985   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
15986     SDNode *V = N0.getNode();
15987 
15988     // If this is a bit convert that changes the element type of the vector but
15989     // not the number of vector elements, look through it.  Be careful not to
15990     // look though conversions that change things like v4f32 to v2f64.
15991     if (V->getOpcode() == ISD::BITCAST) {
15992       SDValue ConvInput = V->getOperand(0);
15993       if (ConvInput.getValueType().isVector() &&
15994           ConvInput.getValueType().getVectorNumElements() == NumElts)
15995         V = ConvInput.getNode();
15996     }
15997 
15998     if (V->getOpcode() == ISD::BUILD_VECTOR) {
15999       assert(V->getNumOperands() == NumElts &&
16000              "BUILD_VECTOR has wrong number of operands");
16001       SDValue Base;
16002       bool AllSame = true;
16003       for (unsigned i = 0; i != NumElts; ++i) {
16004         if (!V->getOperand(i).isUndef()) {
16005           Base = V->getOperand(i);
16006           break;
16007         }
16008       }
16009       // Splat of <u, u, u, u>, return <u, u, u, u>
16010       if (!Base.getNode())
16011         return N0;
16012       for (unsigned i = 0; i != NumElts; ++i) {
16013         if (V->getOperand(i) != Base) {
16014           AllSame = false;
16015           break;
16016         }
16017       }
16018       // Splat of <x, x, x, x>, return <x, x, x, x>
16019       if (AllSame)
16020         return N0;
16021 
16022       // Canonicalize any other splat as a build_vector.
16023       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
16024       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
16025       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
16026 
16027       // We may have jumped through bitcasts, so the type of the
16028       // BUILD_VECTOR may not match the type of the shuffle.
16029       if (V->getValueType(0) != VT)
16030         NewBV = DAG.getBitcast(VT, NewBV);
16031       return NewBV;
16032     }
16033   }
16034 
16035   // Simplify source operands based on shuffle mask.
16036   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16037     return SDValue(N, 0);
16038 
16039   // Match shuffles that can be converted to any_vector_extend_in_reg.
16040   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
16041     return V;
16042 
16043   // Combine "truncate_vector_in_reg" style shuffles.
16044   if (SDValue V = combineTruncationShuffle(SVN, DAG))
16045     return V;
16046 
16047   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
16048       Level < AfterLegalizeVectorOps &&
16049       (N1.isUndef() ||
16050       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
16051        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
16052     if (SDValue V = partitionShuffleOfConcats(N, DAG))
16053       return V;
16054   }
16055 
16056   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16057   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16058   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16059     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
16060       return Res;
16061 
16062   // If this shuffle only has a single input that is a bitcasted shuffle,
16063   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
16064   // back to their original types.
16065   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16066       N1.isUndef() && Level < AfterLegalizeVectorOps &&
16067       TLI.isTypeLegal(VT)) {
16068 
16069     // Peek through the bitcast only if there is one user.
16070     SDValue BC0 = N0;
16071     while (BC0.getOpcode() == ISD::BITCAST) {
16072       if (!BC0.hasOneUse())
16073         break;
16074       BC0 = BC0.getOperand(0);
16075     }
16076 
16077     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
16078       if (Scale == 1)
16079         return SmallVector<int, 8>(Mask.begin(), Mask.end());
16080 
16081       SmallVector<int, 8> NewMask;
16082       for (int M : Mask)
16083         for (int s = 0; s != Scale; ++s)
16084           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
16085       return NewMask;
16086     };
16087 
16088     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
16089       EVT SVT = VT.getScalarType();
16090       EVT InnerVT = BC0->getValueType(0);
16091       EVT InnerSVT = InnerVT.getScalarType();
16092 
16093       // Determine which shuffle works with the smaller scalar type.
16094       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
16095       EVT ScaleSVT = ScaleVT.getScalarType();
16096 
16097       if (TLI.isTypeLegal(ScaleVT) &&
16098           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
16099           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
16100         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16101         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16102 
16103         // Scale the shuffle masks to the smaller scalar type.
16104         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
16105         SmallVector<int, 8> InnerMask =
16106             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
16107         SmallVector<int, 8> OuterMask =
16108             ScaleShuffleMask(SVN->getMask(), OuterScale);
16109 
16110         // Merge the shuffle masks.
16111         SmallVector<int, 8> NewMask;
16112         for (int M : OuterMask)
16113           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
16114 
16115         // Test for shuffle mask legality over both commutations.
16116         SDValue SV0 = BC0->getOperand(0);
16117         SDValue SV1 = BC0->getOperand(1);
16118         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16119         if (!LegalMask) {
16120           std::swap(SV0, SV1);
16121           ShuffleVectorSDNode::commuteMask(NewMask);
16122           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16123         }
16124 
16125         if (LegalMask) {
16126           SV0 = DAG.getBitcast(ScaleVT, SV0);
16127           SV1 = DAG.getBitcast(ScaleVT, SV1);
16128           return DAG.getBitcast(
16129               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
16130         }
16131       }
16132     }
16133   }
16134 
16135   // Canonicalize shuffles according to rules:
16136   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
16137   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
16138   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
16139   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
16140       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
16141       TLI.isTypeLegal(VT)) {
16142     // The incoming shuffle must be of the same type as the result of the
16143     // current shuffle.
16144     assert(N1->getOperand(0).getValueType() == VT &&
16145            "Shuffle types don't match");
16146 
16147     SDValue SV0 = N1->getOperand(0);
16148     SDValue SV1 = N1->getOperand(1);
16149     bool HasSameOp0 = N0 == SV0;
16150     bool IsSV1Undef = SV1.isUndef();
16151     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
16152       // Commute the operands of this shuffle so that next rule
16153       // will trigger.
16154       return DAG.getCommutedVectorShuffle(*SVN);
16155   }
16156 
16157   // Try to fold according to rules:
16158   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16159   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16160   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16161   // Don't try to fold shuffles with illegal type.
16162   // Only fold if this shuffle is the only user of the other shuffle.
16163   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
16164       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
16165     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
16166 
16167     // Don't try to fold splats; they're likely to simplify somehow, or they
16168     // might be free.
16169     if (OtherSV->isSplat())
16170       return SDValue();
16171 
16172     // The incoming shuffle must be of the same type as the result of the
16173     // current shuffle.
16174     assert(OtherSV->getOperand(0).getValueType() == VT &&
16175            "Shuffle types don't match");
16176 
16177     SDValue SV0, SV1;
16178     SmallVector<int, 4> Mask;
16179     // Compute the combined shuffle mask for a shuffle with SV0 as the first
16180     // operand, and SV1 as the second operand.
16181     for (unsigned i = 0; i != NumElts; ++i) {
16182       int Idx = SVN->getMaskElt(i);
16183       if (Idx < 0) {
16184         // Propagate Undef.
16185         Mask.push_back(Idx);
16186         continue;
16187       }
16188 
16189       SDValue CurrentVec;
16190       if (Idx < (int)NumElts) {
16191         // This shuffle index refers to the inner shuffle N0. Lookup the inner
16192         // shuffle mask to identify which vector is actually referenced.
16193         Idx = OtherSV->getMaskElt(Idx);
16194         if (Idx < 0) {
16195           // Propagate Undef.
16196           Mask.push_back(Idx);
16197           continue;
16198         }
16199 
16200         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
16201                                            : OtherSV->getOperand(1);
16202       } else {
16203         // This shuffle index references an element within N1.
16204         CurrentVec = N1;
16205       }
16206 
16207       // Simple case where 'CurrentVec' is UNDEF.
16208       if (CurrentVec.isUndef()) {
16209         Mask.push_back(-1);
16210         continue;
16211       }
16212 
16213       // Canonicalize the shuffle index. We don't know yet if CurrentVec
16214       // will be the first or second operand of the combined shuffle.
16215       Idx = Idx % NumElts;
16216       if (!SV0.getNode() || SV0 == CurrentVec) {
16217         // Ok. CurrentVec is the left hand side.
16218         // Update the mask accordingly.
16219         SV0 = CurrentVec;
16220         Mask.push_back(Idx);
16221         continue;
16222       }
16223 
16224       // Bail out if we cannot convert the shuffle pair into a single shuffle.
16225       if (SV1.getNode() && SV1 != CurrentVec)
16226         return SDValue();
16227 
16228       // Ok. CurrentVec is the right hand side.
16229       // Update the mask accordingly.
16230       SV1 = CurrentVec;
16231       Mask.push_back(Idx + NumElts);
16232     }
16233 
16234     // Check if all indices in Mask are Undef. In case, propagate Undef.
16235     bool isUndefMask = true;
16236     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
16237       isUndefMask &= Mask[i] < 0;
16238 
16239     if (isUndefMask)
16240       return DAG.getUNDEF(VT);
16241 
16242     if (!SV0.getNode())
16243       SV0 = DAG.getUNDEF(VT);
16244     if (!SV1.getNode())
16245       SV1 = DAG.getUNDEF(VT);
16246 
16247     // Avoid introducing shuffles with illegal mask.
16248     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
16249       ShuffleVectorSDNode::commuteMask(Mask);
16250 
16251       if (!TLI.isShuffleMaskLegal(Mask, VT))
16252         return SDValue();
16253 
16254       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
16255       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
16256       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
16257       std::swap(SV0, SV1);
16258     }
16259 
16260     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16261     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16262     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16263     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
16264   }
16265 
16266   return SDValue();
16267 }
16268 
16269 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16270   SDValue InVal = N->getOperand(0);
16271   EVT VT = N->getValueType(0);
16272 
16273   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16274   // with a VECTOR_SHUFFLE and possible truncate.
16275   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16276     SDValue InVec = InVal->getOperand(0);
16277     SDValue EltNo = InVal->getOperand(1);
16278     auto InVecT = InVec.getValueType();
16279     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
16280       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16281       int Elt = C0->getZExtValue();
16282       NewMask[0] = Elt;
16283       SDValue Val;
16284       // If we have an implict truncate do truncate here as long as it's legal.
16285       // if it's not legal, this should
16286       if (VT.getScalarType() != InVal.getValueType() &&
16287           InVal.getValueType().isScalarInteger() &&
16288           isTypeLegal(VT.getScalarType())) {
16289         Val =
16290             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16291         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16292       }
16293       if (VT.getScalarType() == InVecT.getScalarType() &&
16294           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16295           TLI.isShuffleMaskLegal(NewMask, VT)) {
16296         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16297                                    DAG.getUNDEF(InVecT), NewMask);
16298         // If the initial vector is the correct size this shuffle is a
16299         // valid result.
16300         if (VT == InVecT)
16301           return Val;
16302         // If not we must truncate the vector.
16303         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
16304           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16305           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16306           EVT SubVT =
16307               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16308                                VT.getVectorNumElements());
16309           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16310                             ZeroIdx);
16311           return Val;
16312         }
16313       }
16314     }
16315   }
16316 
16317   return SDValue();
16318 }
16319 
16320 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16321   EVT VT = N->getValueType(0);
16322   SDValue N0 = N->getOperand(0);
16323   SDValue N1 = N->getOperand(1);
16324   SDValue N2 = N->getOperand(2);
16325 
16326   // If inserting an UNDEF, just return the original vector.
16327   if (N1.isUndef())
16328     return N0;
16329 
16330   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16331   // us to pull BITCASTs from input to output.
16332   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16333     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16334       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16335 
16336   // If this is an insert of an extracted vector into an undef vector, we can
16337   // just use the input to the extract.
16338   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16339       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16340     return N1.getOperand(0);
16341 
16342   // If we are inserting a bitcast value into an undef, with the same
16343   // number of elements, just use the bitcast input of the extract.
16344   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16345   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16346   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16347       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16348       N1.getOperand(0).getOperand(1) == N2 &&
16349       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16350           VT.getVectorNumElements() &&
16351       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
16352           VT.getSizeInBits()) {
16353     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16354   }
16355 
16356   // If both N1 and N2 are bitcast values on which insert_subvector
16357   // would makes sense, pull the bitcast through.
16358   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16359   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16360   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16361     SDValue CN0 = N0.getOperand(0);
16362     SDValue CN1 = N1.getOperand(0);
16363     EVT CN0VT = CN0.getValueType();
16364     EVT CN1VT = CN1.getValueType();
16365     if (CN0VT.isVector() && CN1VT.isVector() &&
16366         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
16367         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
16368       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16369                                       CN0.getValueType(), CN0, CN1, N2);
16370       return DAG.getBitcast(VT, NewINSERT);
16371     }
16372   }
16373 
16374   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16375   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16376   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16377   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16378       N0.getOperand(1).getValueType() == N1.getValueType() &&
16379       N0.getOperand(2) == N2)
16380     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16381                        N1, N2);
16382 
16383   if (!isa<ConstantSDNode>(N2))
16384     return SDValue();
16385 
16386   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16387 
16388   // Canonicalize insert_subvector dag nodes.
16389   // Example:
16390   // (insert_subvector (insert_subvector A, Idx0), Idx1)
16391   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16392   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16393       N1.getValueType() == N0.getOperand(1).getValueType() &&
16394       isa<ConstantSDNode>(N0.getOperand(2))) {
16395     unsigned OtherIdx = N0.getConstantOperandVal(2);
16396     if (InsIdx < OtherIdx) {
16397       // Swap nodes.
16398       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16399                                   N0.getOperand(0), N1, N2);
16400       AddToWorklist(NewOp.getNode());
16401       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
16402                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
16403     }
16404   }
16405 
16406   // If the input vector is a concatenation, and the insert replaces
16407   // one of the pieces, we can optimize into a single concat_vectors.
16408   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
16409       N0.getOperand(0).getValueType() == N1.getValueType()) {
16410     unsigned Factor = N1.getValueType().getVectorNumElements();
16411 
16412     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
16413     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
16414 
16415     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16416   }
16417 
16418   return SDValue();
16419 }
16420 
16421 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
16422   SDValue N0 = N->getOperand(0);
16423 
16424   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
16425   if (N0->getOpcode() == ISD::FP16_TO_FP)
16426     return N0->getOperand(0);
16427 
16428   return SDValue();
16429 }
16430 
16431 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
16432   SDValue N0 = N->getOperand(0);
16433 
16434   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
16435   if (N0->getOpcode() == ISD::AND) {
16436     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
16437     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
16438       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
16439                          N0.getOperand(0));
16440     }
16441   }
16442 
16443   return SDValue();
16444 }
16445 
16446 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
16447 /// with the destination vector and a zero vector.
16448 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
16449 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
16450 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
16451   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
16452 
16453   EVT VT = N->getValueType(0);
16454   SDValue LHS = N->getOperand(0);
16455   SDValue RHS = peekThroughBitcast(N->getOperand(1));
16456   SDLoc DL(N);
16457 
16458   // Make sure we're not running after operation legalization where it
16459   // may have custom lowered the vector shuffles.
16460   if (LegalOperations)
16461     return SDValue();
16462 
16463   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
16464     return SDValue();
16465 
16466   EVT RVT = RHS.getValueType();
16467   unsigned NumElts = RHS.getNumOperands();
16468 
16469   // Attempt to create a valid clear mask, splitting the mask into
16470   // sub elements and checking to see if each is
16471   // all zeros or all ones - suitable for shuffle masking.
16472   auto BuildClearMask = [&](int Split) {
16473     int NumSubElts = NumElts * Split;
16474     int NumSubBits = RVT.getScalarSizeInBits() / Split;
16475 
16476     SmallVector<int, 8> Indices;
16477     for (int i = 0; i != NumSubElts; ++i) {
16478       int EltIdx = i / Split;
16479       int SubIdx = i % Split;
16480       SDValue Elt = RHS.getOperand(EltIdx);
16481       if (Elt.isUndef()) {
16482         Indices.push_back(-1);
16483         continue;
16484       }
16485 
16486       APInt Bits;
16487       if (isa<ConstantSDNode>(Elt))
16488         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
16489       else if (isa<ConstantFPSDNode>(Elt))
16490         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
16491       else
16492         return SDValue();
16493 
16494       // Extract the sub element from the constant bit mask.
16495       if (DAG.getDataLayout().isBigEndian()) {
16496         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
16497       } else {
16498         Bits.lshrInPlace(SubIdx * NumSubBits);
16499       }
16500 
16501       if (Split > 1)
16502         Bits = Bits.trunc(NumSubBits);
16503 
16504       if (Bits.isAllOnesValue())
16505         Indices.push_back(i);
16506       else if (Bits == 0)
16507         Indices.push_back(i + NumSubElts);
16508       else
16509         return SDValue();
16510     }
16511 
16512     // Let's see if the target supports this vector_shuffle.
16513     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
16514     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
16515     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
16516       return SDValue();
16517 
16518     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
16519     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
16520                                                    DAG.getBitcast(ClearVT, LHS),
16521                                                    Zero, Indices));
16522   };
16523 
16524   // Determine maximum split level (byte level masking).
16525   int MaxSplit = 1;
16526   if (RVT.getScalarSizeInBits() % 8 == 0)
16527     MaxSplit = RVT.getScalarSizeInBits() / 8;
16528 
16529   for (int Split = 1; Split <= MaxSplit; ++Split)
16530     if (RVT.getScalarSizeInBits() % Split == 0)
16531       if (SDValue S = BuildClearMask(Split))
16532         return S;
16533 
16534   return SDValue();
16535 }
16536 
16537 /// Visit a binary vector operation, like ADD.
16538 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
16539   assert(N->getValueType(0).isVector() &&
16540          "SimplifyVBinOp only works on vectors!");
16541 
16542   SDValue LHS = N->getOperand(0);
16543   SDValue RHS = N->getOperand(1);
16544   SDValue Ops[] = {LHS, RHS};
16545 
16546   // See if we can constant fold the vector operation.
16547   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
16548           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
16549     return Fold;
16550 
16551   // Type legalization might introduce new shuffles in the DAG.
16552   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
16553   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
16554   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
16555       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
16556       LHS.getOperand(1).isUndef() &&
16557       RHS.getOperand(1).isUndef()) {
16558     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
16559     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
16560 
16561     if (SVN0->getMask().equals(SVN1->getMask())) {
16562       EVT VT = N->getValueType(0);
16563       SDValue UndefVector = LHS.getOperand(1);
16564       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
16565                                      LHS.getOperand(0), RHS.getOperand(0),
16566                                      N->getFlags());
16567       AddUsersToWorklist(N);
16568       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
16569                                   SVN0->getMask());
16570     }
16571   }
16572 
16573   return SDValue();
16574 }
16575 
16576 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
16577                                     SDValue N2) {
16578   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
16579 
16580   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
16581                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
16582 
16583   // If we got a simplified select_cc node back from SimplifySelectCC, then
16584   // break it down into a new SETCC node, and a new SELECT node, and then return
16585   // the SELECT node, since we were called with a SELECT node.
16586   if (SCC.getNode()) {
16587     // Check to see if we got a select_cc back (to turn into setcc/select).
16588     // Otherwise, just return whatever node we got back, like fabs.
16589     if (SCC.getOpcode() == ISD::SELECT_CC) {
16590       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
16591                                   N0.getValueType(),
16592                                   SCC.getOperand(0), SCC.getOperand(1),
16593                                   SCC.getOperand(4));
16594       AddToWorklist(SETCC.getNode());
16595       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
16596                            SCC.getOperand(2), SCC.getOperand(3));
16597     }
16598 
16599     return SCC;
16600   }
16601   return SDValue();
16602 }
16603 
16604 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
16605 /// being selected between, see if we can simplify the select.  Callers of this
16606 /// should assume that TheSelect is deleted if this returns true.  As such, they
16607 /// should return the appropriate thing (e.g. the node) back to the top-level of
16608 /// the DAG combiner loop to avoid it being looked at.
16609 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
16610                                     SDValue RHS) {
16611   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16612   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
16613   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
16614     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
16615       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
16616       SDValue Sqrt = RHS;
16617       ISD::CondCode CC;
16618       SDValue CmpLHS;
16619       const ConstantFPSDNode *Zero = nullptr;
16620 
16621       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
16622         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
16623         CmpLHS = TheSelect->getOperand(0);
16624         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
16625       } else {
16626         // SELECT or VSELECT
16627         SDValue Cmp = TheSelect->getOperand(0);
16628         if (Cmp.getOpcode() == ISD::SETCC) {
16629           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
16630           CmpLHS = Cmp.getOperand(0);
16631           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
16632         }
16633       }
16634       if (Zero && Zero->isZero() &&
16635           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
16636           CC == ISD::SETULT || CC == ISD::SETLT)) {
16637         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
16638         CombineTo(TheSelect, Sqrt);
16639         return true;
16640       }
16641     }
16642   }
16643   // Cannot simplify select with vector condition
16644   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
16645 
16646   // If this is a select from two identical things, try to pull the operation
16647   // through the select.
16648   if (LHS.getOpcode() != RHS.getOpcode() ||
16649       !LHS.hasOneUse() || !RHS.hasOneUse())
16650     return false;
16651 
16652   // If this is a load and the token chain is identical, replace the select
16653   // of two loads with a load through a select of the address to load from.
16654   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
16655   // constants have been dropped into the constant pool.
16656   if (LHS.getOpcode() == ISD::LOAD) {
16657     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
16658     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
16659 
16660     // Token chains must be identical.
16661     if (LHS.getOperand(0) != RHS.getOperand(0) ||
16662         // Do not let this transformation reduce the number of volatile loads.
16663         LLD->isVolatile() || RLD->isVolatile() ||
16664         // FIXME: If either is a pre/post inc/dec load,
16665         // we'd need to split out the address adjustment.
16666         LLD->isIndexed() || RLD->isIndexed() ||
16667         // If this is an EXTLOAD, the VT's must match.
16668         LLD->getMemoryVT() != RLD->getMemoryVT() ||
16669         // If this is an EXTLOAD, the kind of extension must match.
16670         (LLD->getExtensionType() != RLD->getExtensionType() &&
16671          // The only exception is if one of the extensions is anyext.
16672          LLD->getExtensionType() != ISD::EXTLOAD &&
16673          RLD->getExtensionType() != ISD::EXTLOAD) ||
16674         // FIXME: this discards src value information.  This is
16675         // over-conservative. It would be beneficial to be able to remember
16676         // both potential memory locations.  Since we are discarding
16677         // src value info, don't do the transformation if the memory
16678         // locations are not in the default address space.
16679         LLD->getPointerInfo().getAddrSpace() != 0 ||
16680         RLD->getPointerInfo().getAddrSpace() != 0 ||
16681         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
16682                                       LLD->getBasePtr().getValueType()))
16683       return false;
16684 
16685     // Check that the select condition doesn't reach either load.  If so,
16686     // folding this will induce a cycle into the DAG.  If not, this is safe to
16687     // xform, so create a select of the addresses.
16688     SDValue Addr;
16689     if (TheSelect->getOpcode() == ISD::SELECT) {
16690       SDNode *CondNode = TheSelect->getOperand(0).getNode();
16691       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
16692           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
16693         return false;
16694       // The loads must not depend on one another.
16695       if (LLD->isPredecessorOf(RLD) ||
16696           RLD->isPredecessorOf(LLD))
16697         return false;
16698       Addr = DAG.getSelect(SDLoc(TheSelect),
16699                            LLD->getBasePtr().getValueType(),
16700                            TheSelect->getOperand(0), LLD->getBasePtr(),
16701                            RLD->getBasePtr());
16702     } else {  // Otherwise SELECT_CC
16703       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
16704       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
16705 
16706       if ((LLD->hasAnyUseOfValue(1) &&
16707            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
16708           (RLD->hasAnyUseOfValue(1) &&
16709            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
16710         return false;
16711 
16712       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
16713                          LLD->getBasePtr().getValueType(),
16714                          TheSelect->getOperand(0),
16715                          TheSelect->getOperand(1),
16716                          LLD->getBasePtr(), RLD->getBasePtr(),
16717                          TheSelect->getOperand(4));
16718     }
16719 
16720     SDValue Load;
16721     // It is safe to replace the two loads if they have different alignments,
16722     // but the new load must be the minimum (most restrictive) alignment of the
16723     // inputs.
16724     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
16725     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
16726     if (!RLD->isInvariant())
16727       MMOFlags &= ~MachineMemOperand::MOInvariant;
16728     if (!RLD->isDereferenceable())
16729       MMOFlags &= ~MachineMemOperand::MODereferenceable;
16730     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
16731       // FIXME: Discards pointer and AA info.
16732       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
16733                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
16734                          MMOFlags);
16735     } else {
16736       // FIXME: Discards pointer and AA info.
16737       Load = DAG.getExtLoad(
16738           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
16739                                                   : LLD->getExtensionType(),
16740           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
16741           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
16742     }
16743 
16744     // Users of the select now use the result of the load.
16745     CombineTo(TheSelect, Load);
16746 
16747     // Users of the old loads now use the new load's chain.  We know the
16748     // old-load value is dead now.
16749     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
16750     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
16751     return true;
16752   }
16753 
16754   return false;
16755 }
16756 
16757 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
16758 /// bitwise 'and'.
16759 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
16760                                             SDValue N1, SDValue N2, SDValue N3,
16761                                             ISD::CondCode CC) {
16762   // If this is a select where the false operand is zero and the compare is a
16763   // check of the sign bit, see if we can perform the "gzip trick":
16764   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
16765   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
16766   EVT XType = N0.getValueType();
16767   EVT AType = N2.getValueType();
16768   if (!isNullConstant(N3) || !XType.bitsGE(AType))
16769     return SDValue();
16770 
16771   // If the comparison is testing for a positive value, we have to invert
16772   // the sign bit mask, so only do that transform if the target has a bitwise
16773   // 'and not' instruction (the invert is free).
16774   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
16775     // (X > -1) ? A : 0
16776     // (X >  0) ? X : 0 <-- This is canonical signed max.
16777     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
16778       return SDValue();
16779   } else if (CC == ISD::SETLT) {
16780     // (X <  0) ? A : 0
16781     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
16782     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
16783       return SDValue();
16784   } else {
16785     return SDValue();
16786   }
16787 
16788   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
16789   // constant.
16790   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
16791   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16792   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
16793     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
16794     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
16795     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
16796     AddToWorklist(Shift.getNode());
16797 
16798     if (XType.bitsGT(AType)) {
16799       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16800       AddToWorklist(Shift.getNode());
16801     }
16802 
16803     if (CC == ISD::SETGT)
16804       Shift = DAG.getNOT(DL, Shift, AType);
16805 
16806     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16807   }
16808 
16809   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
16810   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
16811   AddToWorklist(Shift.getNode());
16812 
16813   if (XType.bitsGT(AType)) {
16814     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
16815     AddToWorklist(Shift.getNode());
16816   }
16817 
16818   if (CC == ISD::SETGT)
16819     Shift = DAG.getNOT(DL, Shift, AType);
16820 
16821   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
16822 }
16823 
16824 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
16825 /// where 'cond' is the comparison specified by CC.
16826 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
16827                                       SDValue N2, SDValue N3, ISD::CondCode CC,
16828                                       bool NotExtCompare) {
16829   // (x ? y : y) -> y.
16830   if (N2 == N3) return N2;
16831 
16832   EVT VT = N2.getValueType();
16833   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
16834   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
16835 
16836   // Determine if the condition we're dealing with is constant
16837   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
16838                               N0, N1, CC, DL, false);
16839   if (SCC.getNode()) AddToWorklist(SCC.getNode());
16840 
16841   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
16842     // fold select_cc true, x, y -> x
16843     // fold select_cc false, x, y -> y
16844     return !SCCC->isNullValue() ? N2 : N3;
16845   }
16846 
16847   // Check to see if we can simplify the select into an fabs node
16848   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
16849     // Allow either -0.0 or 0.0
16850     if (CFP->isZero()) {
16851       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
16852       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
16853           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
16854           N2 == N3.getOperand(0))
16855         return DAG.getNode(ISD::FABS, DL, VT, N0);
16856 
16857       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
16858       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
16859           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
16860           N2.getOperand(0) == N3)
16861         return DAG.getNode(ISD::FABS, DL, VT, N3);
16862     }
16863   }
16864 
16865   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
16866   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
16867   // in it.  This is a win when the constant is not otherwise available because
16868   // it replaces two constant pool loads with one.  We only do this if the FP
16869   // type is known to be legal, because if it isn't, then we are before legalize
16870   // types an we want the other legalization to happen first (e.g. to avoid
16871   // messing with soft float) and if the ConstantFP is not legal, because if
16872   // it is legal, we may not need to store the FP constant in a constant pool.
16873   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
16874     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
16875       if (TLI.isTypeLegal(N2.getValueType()) &&
16876           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
16877                TargetLowering::Legal &&
16878            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
16879            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
16880           // If both constants have multiple uses, then we won't need to do an
16881           // extra load, they are likely around in registers for other users.
16882           (TV->hasOneUse() || FV->hasOneUse())) {
16883         Constant *Elts[] = {
16884           const_cast<ConstantFP*>(FV->getConstantFPValue()),
16885           const_cast<ConstantFP*>(TV->getConstantFPValue())
16886         };
16887         Type *FPTy = Elts[0]->getType();
16888         const DataLayout &TD = DAG.getDataLayout();
16889 
16890         // Create a ConstantArray of the two constants.
16891         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
16892         SDValue CPIdx =
16893             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
16894                                 TD.getPrefTypeAlignment(FPTy));
16895         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
16896 
16897         // Get the offsets to the 0 and 1 element of the array so that we can
16898         // select between them.
16899         SDValue Zero = DAG.getIntPtrConstant(0, DL);
16900         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
16901         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
16902 
16903         SDValue Cond = DAG.getSetCC(DL,
16904                                     getSetCCResultType(N0.getValueType()),
16905                                     N0, N1, CC);
16906         AddToWorklist(Cond.getNode());
16907         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
16908                                           Cond, One, Zero);
16909         AddToWorklist(CstOffset.getNode());
16910         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
16911                             CstOffset);
16912         AddToWorklist(CPIdx.getNode());
16913         return DAG.getLoad(
16914             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
16915             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
16916             Alignment);
16917       }
16918     }
16919 
16920   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
16921     return V;
16922 
16923   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
16924   // where y is has a single bit set.
16925   // A plaintext description would be, we can turn the SELECT_CC into an AND
16926   // when the condition can be materialized as an all-ones register.  Any
16927   // single bit-test can be materialized as an all-ones register with
16928   // shift-left and shift-right-arith.
16929   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
16930       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
16931     SDValue AndLHS = N0->getOperand(0);
16932     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16933     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
16934       // Shift the tested bit over the sign bit.
16935       const APInt &AndMask = ConstAndRHS->getAPIntValue();
16936       SDValue ShlAmt =
16937         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
16938                         getShiftAmountTy(AndLHS.getValueType()));
16939       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
16940 
16941       // Now arithmetic right shift it all the way over, so the result is either
16942       // all-ones, or zero.
16943       SDValue ShrAmt =
16944         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
16945                         getShiftAmountTy(Shl.getValueType()));
16946       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
16947 
16948       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
16949     }
16950   }
16951 
16952   // fold select C, 16, 0 -> shl C, 4
16953   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
16954       TLI.getBooleanContents(N0.getValueType()) ==
16955           TargetLowering::ZeroOrOneBooleanContent) {
16956 
16957     // If the caller doesn't want us to simplify this into a zext of a compare,
16958     // don't do it.
16959     if (NotExtCompare && N2C->isOne())
16960       return SDValue();
16961 
16962     // Get a SetCC of the condition
16963     // NOTE: Don't create a SETCC if it's not legal on this target.
16964     if (!LegalOperations ||
16965         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
16966       SDValue Temp, SCC;
16967       // cast from setcc result type to select result type
16968       if (LegalTypes) {
16969         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
16970                             N0, N1, CC);
16971         if (N2.getValueType().bitsLT(SCC.getValueType()))
16972           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
16973                                         N2.getValueType());
16974         else
16975           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16976                              N2.getValueType(), SCC);
16977       } else {
16978         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
16979         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
16980                            N2.getValueType(), SCC);
16981       }
16982 
16983       AddToWorklist(SCC.getNode());
16984       AddToWorklist(Temp.getNode());
16985 
16986       if (N2C->isOne())
16987         return Temp;
16988 
16989       // shl setcc result by log2 n2c
16990       return DAG.getNode(
16991           ISD::SHL, DL, N2.getValueType(), Temp,
16992           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
16993                           getShiftAmountTy(Temp.getValueType())));
16994     }
16995   }
16996 
16997   // Check to see if this is an integer abs.
16998   // select_cc setg[te] X,  0,  X, -X ->
16999   // select_cc setgt    X, -1,  X, -X ->
17000   // select_cc setl[te] X,  0, -X,  X ->
17001   // select_cc setlt    X,  1, -X,  X ->
17002   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17003   if (N1C) {
17004     ConstantSDNode *SubC = nullptr;
17005     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17006          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17007         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17008       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17009     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17010               (N1C->isOne() && CC == ISD::SETLT)) &&
17011              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17012       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
17013 
17014     EVT XType = N0.getValueType();
17015     if (SubC && SubC->isNullValue() && XType.isInteger()) {
17016       SDLoc DL(N0);
17017       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
17018                                   N0,
17019                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
17020                                          getShiftAmountTy(N0.getValueType())));
17021       SDValue Add = DAG.getNode(ISD::ADD, DL,
17022                                 XType, N0, Shift);
17023       AddToWorklist(Shift.getNode());
17024       AddToWorklist(Add.getNode());
17025       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
17026     }
17027   }
17028 
17029   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
17030   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
17031   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
17032   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
17033   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
17034   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
17035   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
17036   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
17037   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
17038     SDValue ValueOnZero = N2;
17039     SDValue Count = N3;
17040     // If the condition is NE instead of E, swap the operands.
17041     if (CC == ISD::SETNE)
17042       std::swap(ValueOnZero, Count);
17043     // Check if the value on zero is a constant equal to the bits in the type.
17044     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
17045       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
17046         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
17047         // legal, combine to just cttz.
17048         if ((Count.getOpcode() == ISD::CTTZ ||
17049              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
17050             N0 == Count.getOperand(0) &&
17051             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
17052           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
17053         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
17054         // legal, combine to just ctlz.
17055         if ((Count.getOpcode() == ISD::CTLZ ||
17056              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
17057             N0 == Count.getOperand(0) &&
17058             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
17059           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
17060       }
17061     }
17062   }
17063 
17064   return SDValue();
17065 }
17066 
17067 /// This is a stub for TargetLowering::SimplifySetCC.
17068 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
17069                                    ISD::CondCode Cond, const SDLoc &DL,
17070                                    bool foldBooleans) {
17071   TargetLowering::DAGCombinerInfo
17072     DagCombineInfo(DAG, Level, false, this);
17073   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
17074 }
17075 
17076 /// Given an ISD::SDIV node expressing a divide by constant, return
17077 /// a DAG expression to select that will generate the same value by multiplying
17078 /// by a magic number.
17079 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17080 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
17081   // when optimising for minimum size, we don't want to expand a div to a mul
17082   // and a shift.
17083   if (DAG.getMachineFunction().getFunction().optForMinSize())
17084     return SDValue();
17085 
17086   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17087   if (!C)
17088     return SDValue();
17089 
17090   // Avoid division by zero.
17091   if (C->isNullValue())
17092     return SDValue();
17093 
17094   std::vector<SDNode *> Built;
17095   SDValue S =
17096       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17097 
17098   for (SDNode *N : Built)
17099     AddToWorklist(N);
17100   return S;
17101 }
17102 
17103 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
17104 /// DAG expression that will generate the same value by right shifting.
17105 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
17106   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17107   if (!C)
17108     return SDValue();
17109 
17110   // Avoid division by zero.
17111   if (C->isNullValue())
17112     return SDValue();
17113 
17114   std::vector<SDNode *> Built;
17115   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
17116 
17117   for (SDNode *N : Built)
17118     AddToWorklist(N);
17119   return S;
17120 }
17121 
17122 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
17123 /// expression that will generate the same value by multiplying by a magic
17124 /// number.
17125 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17126 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
17127   // when optimising for minimum size, we don't want to expand a div to a mul
17128   // and a shift.
17129   if (DAG.getMachineFunction().getFunction().optForMinSize())
17130     return SDValue();
17131 
17132   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17133   if (!C)
17134     return SDValue();
17135 
17136   // Avoid division by zero.
17137   if (C->isNullValue())
17138     return SDValue();
17139 
17140   std::vector<SDNode *> Built;
17141   SDValue S =
17142       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17143 
17144   for (SDNode *N : Built)
17145     AddToWorklist(N);
17146   return S;
17147 }
17148 
17149 /// Determines the LogBase2 value for a non-null input value using the
17150 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
17151 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
17152   EVT VT = V.getValueType();
17153   unsigned EltBits = VT.getScalarSizeInBits();
17154   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
17155   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
17156   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
17157   return LogBase2;
17158 }
17159 
17160 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17161 /// For the reciprocal, we need to find the zero of the function:
17162 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
17163 ///     =>
17164 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
17165 ///     does not require additional intermediate precision]
17166 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
17167   if (Level >= AfterLegalizeDAG)
17168     return SDValue();
17169 
17170   // TODO: Handle half and/or extended types?
17171   EVT VT = Op.getValueType();
17172   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17173     return SDValue();
17174 
17175   // If estimates are explicitly disabled for this function, we're done.
17176   MachineFunction &MF = DAG.getMachineFunction();
17177   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
17178   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17179     return SDValue();
17180 
17181   // Estimates may be explicitly enabled for this type with a custom number of
17182   // refinement steps.
17183   int Iterations = TLI.getDivRefinementSteps(VT, MF);
17184   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
17185     AddToWorklist(Est.getNode());
17186 
17187     if (Iterations) {
17188       EVT VT = Op.getValueType();
17189       SDLoc DL(Op);
17190       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17191 
17192       // Newton iterations: Est = Est + Est (1 - Arg * Est)
17193       for (int i = 0; i < Iterations; ++i) {
17194         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
17195         AddToWorklist(NewEst.getNode());
17196 
17197         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
17198         AddToWorklist(NewEst.getNode());
17199 
17200         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17201         AddToWorklist(NewEst.getNode());
17202 
17203         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
17204         AddToWorklist(Est.getNode());
17205       }
17206     }
17207     return Est;
17208   }
17209 
17210   return SDValue();
17211 }
17212 
17213 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17214 /// For the reciprocal sqrt, we need to find the zero of the function:
17215 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17216 ///     =>
17217 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
17218 /// As a result, we precompute A/2 prior to the iteration loop.
17219 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
17220                                          unsigned Iterations,
17221                                          SDNodeFlags Flags, bool Reciprocal) {
17222   EVT VT = Arg.getValueType();
17223   SDLoc DL(Arg);
17224   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
17225 
17226   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
17227   // this entire sequence requires only one FP constant.
17228   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
17229   AddToWorklist(HalfArg.getNode());
17230 
17231   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
17232   AddToWorklist(HalfArg.getNode());
17233 
17234   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
17235   for (unsigned i = 0; i < Iterations; ++i) {
17236     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
17237     AddToWorklist(NewEst.getNode());
17238 
17239     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
17240     AddToWorklist(NewEst.getNode());
17241 
17242     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
17243     AddToWorklist(NewEst.getNode());
17244 
17245     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17246     AddToWorklist(Est.getNode());
17247   }
17248 
17249   // If non-reciprocal square root is requested, multiply the result by Arg.
17250   if (!Reciprocal) {
17251     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
17252     AddToWorklist(Est.getNode());
17253   }
17254 
17255   return Est;
17256 }
17257 
17258 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17259 /// For the reciprocal sqrt, we need to find the zero of the function:
17260 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17261 ///     =>
17262 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
17263 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
17264                                          unsigned Iterations,
17265                                          SDNodeFlags Flags, bool Reciprocal) {
17266   EVT VT = Arg.getValueType();
17267   SDLoc DL(Arg);
17268   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17269   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17270 
17271   // This routine must enter the loop below to work correctly
17272   // when (Reciprocal == false).
17273   assert(Iterations > 0);
17274 
17275   // Newton iterations for reciprocal square root:
17276   // E = (E * -0.5) * ((A * E) * E + -3.0)
17277   for (unsigned i = 0; i < Iterations; ++i) {
17278     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17279     AddToWorklist(AE.getNode());
17280 
17281     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17282     AddToWorklist(AEE.getNode());
17283 
17284     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17285     AddToWorklist(RHS.getNode());
17286 
17287     // When calculating a square root at the last iteration build:
17288     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17289     // (notice a common subexpression)
17290     SDValue LHS;
17291     if (Reciprocal || (i + 1) < Iterations) {
17292       // RSQRT: LHS = (E * -0.5)
17293       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17294     } else {
17295       // SQRT: LHS = (A * E) * -0.5
17296       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17297     }
17298     AddToWorklist(LHS.getNode());
17299 
17300     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17301     AddToWorklist(Est.getNode());
17302   }
17303 
17304   return Est;
17305 }
17306 
17307 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17308 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17309 /// Op can be zero.
17310 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17311                                            bool Reciprocal) {
17312   if (Level >= AfterLegalizeDAG)
17313     return SDValue();
17314 
17315   // TODO: Handle half and/or extended types?
17316   EVT VT = Op.getValueType();
17317   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17318     return SDValue();
17319 
17320   // If estimates are explicitly disabled for this function, we're done.
17321   MachineFunction &MF = DAG.getMachineFunction();
17322   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17323   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17324     return SDValue();
17325 
17326   // Estimates may be explicitly enabled for this type with a custom number of
17327   // refinement steps.
17328   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17329 
17330   bool UseOneConstNR = false;
17331   if (SDValue Est =
17332       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17333                           Reciprocal)) {
17334     AddToWorklist(Est.getNode());
17335 
17336     if (Iterations) {
17337       Est = UseOneConstNR
17338             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17339             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17340 
17341       if (!Reciprocal) {
17342         // The estimate is now completely wrong if the input was exactly 0.0 or
17343         // possibly a denormal. Force the answer to 0.0 for those cases.
17344         EVT VT = Op.getValueType();
17345         SDLoc DL(Op);
17346         EVT CCVT = getSetCCResultType(VT);
17347         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
17348         const Function &F = DAG.getMachineFunction().getFunction();
17349         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
17350         if (Denorms.getValueAsString().equals("ieee")) {
17351           // fabs(X) < SmallestNormal ? 0.0 : Est
17352           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
17353           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
17354           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
17355           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17356           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
17357           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
17358           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
17359           AddToWorklist(Fabs.getNode());
17360           AddToWorklist(IsDenorm.getNode());
17361           AddToWorklist(Est.getNode());
17362         } else {
17363           // X == 0.0 ? 0.0 : Est
17364           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17365           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17366           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
17367           AddToWorklist(IsZero.getNode());
17368           AddToWorklist(Est.getNode());
17369         }
17370       }
17371     }
17372     return Est;
17373   }
17374 
17375   return SDValue();
17376 }
17377 
17378 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17379   return buildSqrtEstimateImpl(Op, Flags, true);
17380 }
17381 
17382 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17383   return buildSqrtEstimateImpl(Op, Flags, false);
17384 }
17385 
17386 /// Return true if there is any possibility that the two addresses overlap.
17387 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17388   // If they are the same then they must be aliases.
17389   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17390 
17391   // If they are both volatile then they cannot be reordered.
17392   if (Op0->isVolatile() && Op1->isVolatile()) return true;
17393 
17394   // If one operation reads from invariant memory, and the other may store, they
17395   // cannot alias. These should really be checking the equivalent of mayWrite,
17396   // but it only matters for memory nodes other than load /store.
17397   if (Op0->isInvariant() && Op1->writeMem())
17398     return false;
17399 
17400   if (Op1->isInvariant() && Op0->writeMem())
17401     return false;
17402 
17403   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
17404   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
17405 
17406   // Check for BaseIndexOffset matching.
17407   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
17408   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
17409   int64_t PtrDiff;
17410   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
17411     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17412       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17413 
17414     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17415     // able to calculate their relative offset if at least one arises
17416     // from an alloca. However, these allocas cannot overlap and we
17417     // can infer there is no alias.
17418     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
17419       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
17420         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17421         // If the base are the same frame index but the we couldn't find a
17422         // constant offset, (indices are different) be conservative.
17423         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
17424                        !MFI.isFixedObjectIndex(B->getIndex())))
17425           return false;
17426       }
17427 
17428     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
17429     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
17430     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
17431     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
17432     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
17433     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
17434 
17435     // If of mismatched base types or checkable indices we can check
17436     // they do not alias.
17437     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
17438          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
17439         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
17440       return false;
17441   }
17442 
17443   // If we know required SrcValue1 and SrcValue2 have relatively large
17444   // alignment compared to the size and offset of the access, we may be able
17445   // to prove they do not alias. This check is conservative for now to catch
17446   // cases created by splitting vector types.
17447   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
17448   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
17449   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
17450   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
17451   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
17452       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
17453     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
17454     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
17455 
17456     // There is no overlap between these relatively aligned accesses of
17457     // similar size. Return no alias.
17458     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
17459         (OffAlign1 + NumBytes1) <= OffAlign0)
17460       return false;
17461   }
17462 
17463   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
17464                    ? CombinerGlobalAA
17465                    : DAG.getSubtarget().useAA();
17466 #ifndef NDEBUG
17467   if (CombinerAAOnlyFunc.getNumOccurrences() &&
17468       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
17469     UseAA = false;
17470 #endif
17471 
17472   if (UseAA && AA &&
17473       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
17474     // Use alias analysis information.
17475     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
17476     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
17477     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
17478     AliasResult AAResult =
17479         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
17480                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
17481                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
17482                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
17483     if (AAResult == NoAlias)
17484       return false;
17485   }
17486 
17487   // Otherwise we have to assume they alias.
17488   return true;
17489 }
17490 
17491 /// Walk up chain skipping non-aliasing memory nodes,
17492 /// looking for aliasing nodes and adding them to the Aliases vector.
17493 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
17494                                    SmallVectorImpl<SDValue> &Aliases) {
17495   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
17496   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
17497 
17498   // Get alias information for node.
17499   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
17500 
17501   // Starting off.
17502   Chains.push_back(OriginalChain);
17503   unsigned Depth = 0;
17504 
17505   // Look at each chain and determine if it is an alias.  If so, add it to the
17506   // aliases list.  If not, then continue up the chain looking for the next
17507   // candidate.
17508   while (!Chains.empty()) {
17509     SDValue Chain = Chains.pop_back_val();
17510 
17511     // For TokenFactor nodes, look at each operand and only continue up the
17512     // chain until we reach the depth limit.
17513     //
17514     // FIXME: The depth check could be made to return the last non-aliasing
17515     // chain we found before we hit a tokenfactor rather than the original
17516     // chain.
17517     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
17518       Aliases.clear();
17519       Aliases.push_back(OriginalChain);
17520       return;
17521     }
17522 
17523     // Don't bother if we've been before.
17524     if (!Visited.insert(Chain.getNode()).second)
17525       continue;
17526 
17527     switch (Chain.getOpcode()) {
17528     case ISD::EntryToken:
17529       // Entry token is ideal chain operand, but handled in FindBetterChain.
17530       break;
17531 
17532     case ISD::LOAD:
17533     case ISD::STORE: {
17534       // Get alias information for Chain.
17535       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
17536           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
17537 
17538       // If chain is alias then stop here.
17539       if (!(IsLoad && IsOpLoad) &&
17540           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
17541         Aliases.push_back(Chain);
17542       } else {
17543         // Look further up the chain.
17544         Chains.push_back(Chain.getOperand(0));
17545         ++Depth;
17546       }
17547       break;
17548     }
17549 
17550     case ISD::TokenFactor:
17551       // We have to check each of the operands of the token factor for "small"
17552       // token factors, so we queue them up.  Adding the operands to the queue
17553       // (stack) in reverse order maintains the original order and increases the
17554       // likelihood that getNode will find a matching token factor (CSE.)
17555       if (Chain.getNumOperands() > 16) {
17556         Aliases.push_back(Chain);
17557         break;
17558       }
17559       for (unsigned n = Chain.getNumOperands(); n;)
17560         Chains.push_back(Chain.getOperand(--n));
17561       ++Depth;
17562       break;
17563 
17564     case ISD::CopyFromReg:
17565       // Forward past CopyFromReg.
17566       Chains.push_back(Chain.getOperand(0));
17567       ++Depth;
17568       break;
17569 
17570     default:
17571       // For all other instructions we will just have to take what we can get.
17572       Aliases.push_back(Chain);
17573       break;
17574     }
17575   }
17576 }
17577 
17578 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
17579 /// (aliasing node.)
17580 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
17581   if (OptLevel == CodeGenOpt::None)
17582     return OldChain;
17583 
17584   // Ops for replacing token factor.
17585   SmallVector<SDValue, 8> Aliases;
17586 
17587   // Accumulate all the aliases to this node.
17588   GatherAllAliases(N, OldChain, Aliases);
17589 
17590   // If no operands then chain to entry token.
17591   if (Aliases.size() == 0)
17592     return DAG.getEntryNode();
17593 
17594   // If a single operand then chain to it.  We don't need to revisit it.
17595   if (Aliases.size() == 1)
17596     return Aliases[0];
17597 
17598   // Construct a custom tailored token factor.
17599   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
17600 }
17601 
17602 // This function tries to collect a bunch of potentially interesting
17603 // nodes to improve the chains of, all at once. This might seem
17604 // redundant, as this function gets called when visiting every store
17605 // node, so why not let the work be done on each store as it's visited?
17606 //
17607 // I believe this is mainly important because MergeConsecutiveStores
17608 // is unable to deal with merging stores of different sizes, so unless
17609 // we improve the chains of all the potential candidates up-front
17610 // before running MergeConsecutiveStores, it might only see some of
17611 // the nodes that will eventually be candidates, and then not be able
17612 // to go from a partially-merged state to the desired final
17613 // fully-merged state.
17614 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
17615   if (OptLevel == CodeGenOpt::None)
17616     return false;
17617 
17618   // This holds the base pointer, index, and the offset in bytes from the base
17619   // pointer.
17620   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17621 
17622   // We must have a base and an offset.
17623   if (!BasePtr.getBase().getNode())
17624     return false;
17625 
17626   // Do not handle stores to undef base pointers.
17627   if (BasePtr.getBase().isUndef())
17628     return false;
17629 
17630   SmallVector<StoreSDNode *, 8> ChainedStores;
17631   ChainedStores.push_back(St);
17632 
17633   // Walk up the chain and look for nodes with offsets from the same
17634   // base pointer. Stop when reaching an instruction with a different kind
17635   // or instruction which has a different base pointer.
17636   StoreSDNode *Index = St;
17637   while (Index) {
17638     // If the chain has more than one use, then we can't reorder the mem ops.
17639     if (Index != St && !SDValue(Index, 0)->hasOneUse())
17640       break;
17641 
17642     if (Index->isVolatile() || Index->isIndexed())
17643       break;
17644 
17645     // Find the base pointer and offset for this memory node.
17646     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
17647 
17648     // Check that the base pointer is the same as the original one.
17649     if (!BasePtr.equalBaseIndex(Ptr, DAG))
17650       break;
17651 
17652     // Walk up the chain to find the next store node, ignoring any
17653     // intermediate loads. Any other kind of node will halt the loop.
17654     SDNode *NextInChain = Index->getChain().getNode();
17655     while (true) {
17656       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
17657         // We found a store node. Use it for the next iteration.
17658         if (STn->isVolatile() || STn->isIndexed()) {
17659           Index = nullptr;
17660           break;
17661         }
17662         ChainedStores.push_back(STn);
17663         Index = STn;
17664         break;
17665       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
17666         NextInChain = Ldn->getChain().getNode();
17667         continue;
17668       } else {
17669         Index = nullptr;
17670         break;
17671       }
17672     } // end while
17673   }
17674 
17675   // At this point, ChainedStores lists all of the Store nodes
17676   // reachable by iterating up through chain nodes matching the above
17677   // conditions.  For each such store identified, try to find an
17678   // earlier chain to attach the store to which won't violate the
17679   // required ordering.
17680   bool MadeChangeToSt = false;
17681   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
17682 
17683   for (StoreSDNode *ChainedStore : ChainedStores) {
17684     SDValue Chain = ChainedStore->getChain();
17685     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
17686 
17687     if (Chain != BetterChain) {
17688       if (ChainedStore == St)
17689         MadeChangeToSt = true;
17690       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
17691     }
17692   }
17693 
17694   // Do all replacements after finding the replacements to make to avoid making
17695   // the chains more complicated by introducing new TokenFactors.
17696   for (auto Replacement : BetterChains)
17697     replaceStoreChain(Replacement.first, Replacement.second);
17698 
17699   return MadeChangeToSt;
17700 }
17701 
17702 /// This is the entry point for the file.
17703 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
17704                            CodeGenOpt::Level OptLevel) {
17705   /// This is the main entry point to this class.
17706   DAGCombiner(*this, AA, OptLevel).Run(Level);
17707 }
17708