1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/IntervalMap.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallBitVector.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
33 #include "llvm/Analysis/AliasAnalysis.h"
34 #include "llvm/Analysis/MemoryLocation.h"
35 #include "llvm/CodeGen/DAGCombine.h"
36 #include "llvm/CodeGen/ISDOpcodes.h"
37 #include "llvm/CodeGen/MachineFrameInfo.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/RuntimeLibcalls.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/KnownBits.h"
63 #include "llvm/Support/MachineValueType.h"
64 #include "llvm/Support/MathExtras.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Target/TargetMachine.h"
67 #include "llvm/Target/TargetOptions.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95         cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
98 static cl::opt<std::string>
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100                    cl::desc("Only use DAG-combiner alias analysis in this"
101                             " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108                   cl::desc("Bypass the profitability model of load slicing"),
109                   cl::init(false));
110 
111 static cl::opt<bool>
112   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113                     cl::desc("DAG combiner may split indexing from loads"));
114 
115 namespace {
116 
117   class DAGCombiner {
118     SelectionDAG &DAG;
119     const TargetLowering &TLI;
120     CombineLevel Level;
121     CodeGenOpt::Level OptLevel;
122     bool LegalOperations = false;
123     bool LegalTypes = false;
124     bool ForCodeSize;
125 
126     /// Worklist of all of the nodes that need to be simplified.
127     ///
128     /// This must behave as a stack -- new nodes to process are pushed onto the
129     /// back and when processing we pop off of the back.
130     ///
131     /// The worklist will not contain duplicates but may contain null entries
132     /// due to nodes being deleted from the underlying DAG.
133     SmallVector<SDNode *, 64> Worklist;
134 
135     /// Mapping from an SDNode to its position on the worklist.
136     ///
137     /// This is used to find and remove nodes from the worklist (by nulling
138     /// them) when they are deleted from the underlying DAG. It relies on
139     /// stable indices of nodes within the worklist.
140     DenseMap<SDNode *, unsigned> WorklistMap;
141 
142     /// Set of nodes which have been combined (at least once).
143     ///
144     /// This is used to allow us to reliably add any operands of a DAG node
145     /// which have not yet been combined to the worklist.
146     SmallPtrSet<SDNode *, 32> CombinedNodes;
147 
148     // AA - Used for DAG load/store alias analysis.
149     AliasAnalysis *AA;
150 
151     /// When an instruction is simplified, add all users of the instruction to
152     /// the work lists because they might get more simplified now.
153     void AddUsersToWorklist(SDNode *N) {
154       for (SDNode *Node : N->uses())
155         AddToWorklist(Node);
156     }
157 
158     /// Call the node-specific routine that folds each particular type of node.
159     SDValue visit(SDNode *N);
160 
161   public:
162     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
163         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
164           OptLevel(OL), AA(AA) {
165       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
166 
167       MaximumLegalStoreInBits = 0;
168       for (MVT VT : MVT::all_valuetypes())
169         if (EVT(VT).isSimple() && VT != MVT::Other &&
170             TLI.isTypeLegal(EVT(VT)) &&
171             VT.getSizeInBits() >= MaximumLegalStoreInBits)
172           MaximumLegalStoreInBits = VT.getSizeInBits();
173     }
174 
175     /// Add to the worklist making sure its instance is at the back (next to be
176     /// processed.)
177     void AddToWorklist(SDNode *N) {
178       assert(N->getOpcode() != ISD::DELETED_NODE &&
179              "Deleted Node added to Worklist");
180 
181       // Skip handle nodes as they can't usefully be combined and confuse the
182       // zero-use deletion strategy.
183       if (N->getOpcode() == ISD::HANDLENODE)
184         return;
185 
186       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
187         Worklist.push_back(N);
188     }
189 
190     /// Remove all instances of N from the worklist.
191     void removeFromWorklist(SDNode *N) {
192       CombinedNodes.erase(N);
193 
194       auto It = WorklistMap.find(N);
195       if (It == WorklistMap.end())
196         return; // Not in the worklist.
197 
198       // Null out the entry rather than erasing it to avoid a linear operation.
199       Worklist[It->second] = nullptr;
200       WorklistMap.erase(It);
201     }
202 
203     void deleteAndRecombine(SDNode *N);
204     bool recursivelyDeleteUnusedNodes(SDNode *N);
205 
206     /// Replaces all uses of the results of one DAG node with new values.
207     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
208                       bool AddTo = true);
209 
210     /// Replaces all uses of the results of one DAG node with new values.
211     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
212       return CombineTo(N, &Res, 1, AddTo);
213     }
214 
215     /// Replaces all uses of the results of one DAG node with new values.
216     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
217                       bool AddTo = true) {
218       SDValue To[] = { Res0, Res1 };
219       return CombineTo(N, To, 2, AddTo);
220     }
221 
222     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
223 
224   private:
225     unsigned MaximumLegalStoreInBits;
226 
227     /// Check the specified integer node value to see if it can be simplified or
228     /// if things it uses can be simplified by bit propagation.
229     /// If so, return true.
230     bool SimplifyDemandedBits(SDValue Op) {
231       unsigned BitWidth = Op.getScalarValueSizeInBits();
232       APInt Demanded = APInt::getAllOnesValue(BitWidth);
233       return SimplifyDemandedBits(Op, Demanded);
234     }
235 
236     /// Check the specified vector node value to see if it can be simplified or
237     /// if things it uses can be simplified as it only uses some of the
238     /// elements. If so, return true.
239     bool SimplifyDemandedVectorElts(SDValue Op) {
240       unsigned NumElts = Op.getValueType().getVectorNumElements();
241       APInt Demanded = APInt::getAllOnesValue(NumElts);
242       return SimplifyDemandedVectorElts(Op, Demanded);
243     }
244 
245     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
246     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
247                                     bool AssumeSingleUse = false);
248 
249     bool CombineToPreIndexedLoadStore(SDNode *N);
250     bool CombineToPostIndexedLoadStore(SDNode *N);
251     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
252     bool SliceUpLoad(SDNode *N);
253 
254     // Scalars have size 0 to distinguish from singleton vectors.
255     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
256     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
257     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
258 
259     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
260     ///   load.
261     ///
262     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
263     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
264     /// \param EltNo index of the vector element to load.
265     /// \param OriginalLoad load that EVE came from to be replaced.
266     /// \returns EVE on success SDValue() on failure.
267     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
268         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
269     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
270     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
271     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
272     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
273     SDValue PromoteIntBinOp(SDValue Op);
274     SDValue PromoteIntShiftOp(SDValue Op);
275     SDValue PromoteExtend(SDValue Op);
276     bool PromoteLoad(SDValue Op);
277 
278     /// Call the node-specific routine that knows how to fold each
279     /// particular type of node. If that doesn't do anything, try the
280     /// target-specific DAG combines.
281     SDValue combine(SDNode *N);
282 
283     // Visitation implementation - Implement dag node combining for different
284     // node types.  The semantics are as follows:
285     // Return Value:
286     //   SDValue.getNode() == 0 - No change was made
287     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
288     //   otherwise              - N should be replaced by the returned Operand.
289     //
290     SDValue visitTokenFactor(SDNode *N);
291     SDValue visitMERGE_VALUES(SDNode *N);
292     SDValue visitADD(SDNode *N);
293     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
294     SDValue visitSUB(SDNode *N);
295     SDValue visitADDC(SDNode *N);
296     SDValue visitUADDO(SDNode *N);
297     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
298     SDValue visitSUBC(SDNode *N);
299     SDValue visitUSUBO(SDNode *N);
300     SDValue visitADDE(SDNode *N);
301     SDValue visitADDCARRY(SDNode *N);
302     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
303     SDValue visitSUBE(SDNode *N);
304     SDValue visitSUBCARRY(SDNode *N);
305     SDValue visitMUL(SDNode *N);
306     SDValue useDivRem(SDNode *N);
307     SDValue visitSDIV(SDNode *N);
308     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
309     SDValue visitUDIV(SDNode *N);
310     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
311     SDValue visitREM(SDNode *N);
312     SDValue visitMULHU(SDNode *N);
313     SDValue visitMULHS(SDNode *N);
314     SDValue visitSMUL_LOHI(SDNode *N);
315     SDValue visitUMUL_LOHI(SDNode *N);
316     SDValue visitSMULO(SDNode *N);
317     SDValue visitUMULO(SDNode *N);
318     SDValue visitIMINMAX(SDNode *N);
319     SDValue visitAND(SDNode *N);
320     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
321     SDValue visitOR(SDNode *N);
322     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
323     SDValue visitXOR(SDNode *N);
324     SDValue SimplifyVBinOp(SDNode *N);
325     SDValue visitSHL(SDNode *N);
326     SDValue visitSRA(SDNode *N);
327     SDValue visitSRL(SDNode *N);
328     SDValue visitRotate(SDNode *N);
329     SDValue visitABS(SDNode *N);
330     SDValue visitBSWAP(SDNode *N);
331     SDValue visitBITREVERSE(SDNode *N);
332     SDValue visitCTLZ(SDNode *N);
333     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
334     SDValue visitCTTZ(SDNode *N);
335     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
336     SDValue visitCTPOP(SDNode *N);
337     SDValue visitSELECT(SDNode *N);
338     SDValue visitVSELECT(SDNode *N);
339     SDValue visitSELECT_CC(SDNode *N);
340     SDValue visitSETCC(SDNode *N);
341     SDValue visitSETCCCARRY(SDNode *N);
342     SDValue visitSIGN_EXTEND(SDNode *N);
343     SDValue visitZERO_EXTEND(SDNode *N);
344     SDValue visitANY_EXTEND(SDNode *N);
345     SDValue visitAssertExt(SDNode *N);
346     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
347     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
348     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
349     SDValue visitTRUNCATE(SDNode *N);
350     SDValue visitBITCAST(SDNode *N);
351     SDValue visitBUILD_PAIR(SDNode *N);
352     SDValue visitFADD(SDNode *N);
353     SDValue visitFSUB(SDNode *N);
354     SDValue visitFMUL(SDNode *N);
355     SDValue visitFMA(SDNode *N);
356     SDValue visitFDIV(SDNode *N);
357     SDValue visitFREM(SDNode *N);
358     SDValue visitFSQRT(SDNode *N);
359     SDValue visitFCOPYSIGN(SDNode *N);
360     SDValue visitFPOW(SDNode *N);
361     SDValue visitSINT_TO_FP(SDNode *N);
362     SDValue visitUINT_TO_FP(SDNode *N);
363     SDValue visitFP_TO_SINT(SDNode *N);
364     SDValue visitFP_TO_UINT(SDNode *N);
365     SDValue visitFP_ROUND(SDNode *N);
366     SDValue visitFP_ROUND_INREG(SDNode *N);
367     SDValue visitFP_EXTEND(SDNode *N);
368     SDValue visitFNEG(SDNode *N);
369     SDValue visitFABS(SDNode *N);
370     SDValue visitFCEIL(SDNode *N);
371     SDValue visitFTRUNC(SDNode *N);
372     SDValue visitFFLOOR(SDNode *N);
373     SDValue visitFMINNUM(SDNode *N);
374     SDValue visitFMAXNUM(SDNode *N);
375     SDValue visitFMINIMUM(SDNode *N);
376     SDValue visitFMAXIMUM(SDNode *N);
377     SDValue visitBRCOND(SDNode *N);
378     SDValue visitBR_CC(SDNode *N);
379     SDValue visitLOAD(SDNode *N);
380 
381     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
382     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
383 
384     SDValue visitSTORE(SDNode *N);
385     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
386     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
387     SDValue visitBUILD_VECTOR(SDNode *N);
388     SDValue visitCONCAT_VECTORS(SDNode *N);
389     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
390     SDValue visitVECTOR_SHUFFLE(SDNode *N);
391     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
392     SDValue visitINSERT_SUBVECTOR(SDNode *N);
393     SDValue visitMLOAD(SDNode *N);
394     SDValue visitMSTORE(SDNode *N);
395     SDValue visitMGATHER(SDNode *N);
396     SDValue visitMSCATTER(SDNode *N);
397     SDValue visitFP_TO_FP16(SDNode *N);
398     SDValue visitFP16_TO_FP(SDNode *N);
399 
400     SDValue visitFADDForFMACombine(SDNode *N);
401     SDValue visitFSUBForFMACombine(SDNode *N);
402     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
403 
404     SDValue XformToShuffleWithZero(SDNode *N);
405     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
406                            SDValue N1, SDNodeFlags Flags);
407 
408     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
409 
410     SDValue foldSelectOfConstants(SDNode *N);
411     SDValue foldVSelectOfConstants(SDNode *N);
412     SDValue foldBinOpIntoSelect(SDNode *BO);
413     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
414     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
415     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
416     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
417                              SDValue N2, SDValue N3, ISD::CondCode CC,
418                              bool NotExtCompare = false);
419     SDValue convertSelectOfFPConstantsToLoadOffset(
420         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
421         ISD::CondCode CC);
422     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
423                                    SDValue N2, SDValue N3, ISD::CondCode CC);
424     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
425                               const SDLoc &DL);
426     SDValue unfoldMaskedMerge(SDNode *N);
427     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
428     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
429                           const SDLoc &DL, bool foldBooleans);
430     SDValue rebuildSetCC(SDValue N);
431 
432     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
433                            SDValue &CC) const;
434     bool isOneUseSetCC(SDValue N) const;
435 
436     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
437                                          unsigned HiOp);
438     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
439     SDValue CombineExtLoad(SDNode *N);
440     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
441     SDValue combineRepeatedFPDivisors(SDNode *N);
442     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
443     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
444     SDValue BuildSDIV(SDNode *N);
445     SDValue BuildSDIVPow2(SDNode *N);
446     SDValue BuildUDIV(SDNode *N);
447     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
448     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
449     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
450     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
451     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
452     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
453                                 SDNodeFlags Flags, bool Reciprocal);
454     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
455                                 SDNodeFlags Flags, bool Reciprocal);
456     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
457                                bool DemandHighBits = true);
458     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
459     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
460                               SDValue InnerPos, SDValue InnerNeg,
461                               unsigned PosOpcode, unsigned NegOpcode,
462                               const SDLoc &DL);
463     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
464     SDValue MatchLoadCombine(SDNode *N);
465     SDValue ReduceLoadWidth(SDNode *N);
466     SDValue ReduceLoadOpStoreWidth(SDNode *N);
467     SDValue splitMergedValStore(StoreSDNode *ST);
468     SDValue TransformFPLoadStorePair(SDNode *N);
469     SDValue convertBuildVecZextToZext(SDNode *N);
470     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
471     SDValue reduceBuildVecToShuffle(SDNode *N);
472     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
473                                   ArrayRef<int> VectorMask, SDValue VecIn1,
474                                   SDValue VecIn2, unsigned LeftIdx);
475     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
476 
477     /// Walk up chain skipping non-aliasing memory nodes,
478     /// looking for aliasing nodes and adding them to the Aliases vector.
479     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
480                           SmallVectorImpl<SDValue> &Aliases);
481 
482     /// Return true if there is any possibility that the two addresses overlap.
483     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
484 
485     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
486     /// chain (aliasing node.)
487     SDValue FindBetterChain(SDNode *N, SDValue Chain);
488 
489     /// Try to replace a store and any possibly adjacent stores on
490     /// consecutive chains with better chains. Return true only if St is
491     /// replaced.
492     ///
493     /// Notice that other chains may still be replaced even if the function
494     /// returns false.
495     bool findBetterNeighborChains(StoreSDNode *St);
496 
497     // Helper for findBetterNeighborChains. Walk up store chain add additional
498     // chained stores that do not overlap and can be parallelized.
499     bool parallelizeChainedStores(StoreSDNode *St);
500 
501     /// Holds a pointer to an LSBaseSDNode as well as information on where it
502     /// is located in a sequence of memory operations connected by a chain.
503     struct MemOpLink {
504       // Ptr to the mem node.
505       LSBaseSDNode *MemNode;
506 
507       // Offset from the base ptr.
508       int64_t OffsetFromBase;
509 
510       MemOpLink(LSBaseSDNode *N, int64_t Offset)
511           : MemNode(N), OffsetFromBase(Offset) {}
512     };
513 
514     /// This is a helper function for visitMUL to check the profitability
515     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
516     /// MulNode is the original multiply, AddNode is (add x, c1),
517     /// and ConstNode is c2.
518     bool isMulAddWithConstProfitable(SDNode *MulNode,
519                                      SDValue &AddNode,
520                                      SDValue &ConstNode);
521 
522     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
523     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
524     /// the type of the loaded value to be extended.
525     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
526                           EVT LoadResultTy, EVT &ExtVT);
527 
528     /// Helper function to calculate whether the given Load/Store can have its
529     /// width reduced to ExtVT.
530     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
531                            EVT &MemVT, unsigned ShAmt = 0);
532 
533     /// Used by BackwardsPropagateMask to find suitable loads.
534     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
535                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
536                            ConstantSDNode *Mask, SDNode *&NodeToMask);
537     /// Attempt to propagate a given AND node back to load leaves so that they
538     /// can be combined into narrow loads.
539     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
540 
541     /// Helper function for MergeConsecutiveStores which merges the
542     /// component store chains.
543     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
544                                 unsigned NumStores);
545 
546     /// This is a helper function for MergeConsecutiveStores. When the
547     /// source elements of the consecutive stores are all constants or
548     /// all extracted vector elements, try to merge them into one
549     /// larger store introducing bitcasts if necessary.  \return True
550     /// if a merged store was created.
551     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
552                                          EVT MemVT, unsigned NumStores,
553                                          bool IsConstantSrc, bool UseVector,
554                                          bool UseTrunc);
555 
556     /// This is a helper function for MergeConsecutiveStores. Stores
557     /// that potentially may be merged with St are placed in
558     /// StoreNodes. RootNode is a chain predecessor to all store
559     /// candidates.
560     void getStoreMergeCandidates(StoreSDNode *St,
561                                  SmallVectorImpl<MemOpLink> &StoreNodes,
562                                  SDNode *&Root);
563 
564     /// Helper function for MergeConsecutiveStores. Checks if
565     /// candidate stores have indirect dependency through their
566     /// operands. RootNode is the predecessor to all stores calculated
567     /// by getStoreMergeCandidates and is used to prune the dependency check.
568     /// \return True if safe to merge.
569     bool checkMergeStoreCandidatesForDependencies(
570         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
571         SDNode *RootNode);
572 
573     /// Merge consecutive store operations into a wide store.
574     /// This optimization uses wide integers or vectors when possible.
575     /// \return number of stores that were merged into a merged store (the
576     /// affected nodes are stored as a prefix in \p StoreNodes).
577     bool MergeConsecutiveStores(StoreSDNode *St);
578 
579     /// Try to transform a truncation where C is a constant:
580     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
581     ///
582     /// \p N needs to be a truncation and its first operand an AND. Other
583     /// requirements are checked by the function (e.g. that trunc is
584     /// single-use) and if missed an empty SDValue is returned.
585     SDValue distributeTruncateThroughAnd(SDNode *N);
586 
587     /// Helper function to determine whether the target supports operation
588     /// given by \p Opcode for type \p VT, that is, whether the operation
589     /// is legal or custom before legalizing operations, and whether is
590     /// legal (but not custom) after legalization.
591     bool hasOperation(unsigned Opcode, EVT VT) {
592       if (LegalOperations)
593         return TLI.isOperationLegal(Opcode, VT);
594       return TLI.isOperationLegalOrCustom(Opcode, VT);
595     }
596 
597   public:
598     /// Runs the dag combiner on all nodes in the work list
599     void Run(CombineLevel AtLevel);
600 
601     SelectionDAG &getDAG() const { return DAG; }
602 
603     /// Returns a type large enough to hold any valid shift amount - before type
604     /// legalization these can be huge.
605     EVT getShiftAmountTy(EVT LHSTy) {
606       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
607       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
608     }
609 
610     /// This method returns true if we are running before type legalization or
611     /// if the specified VT is legal.
612     bool isTypeLegal(const EVT &VT) {
613       if (!LegalTypes) return true;
614       return TLI.isTypeLegal(VT);
615     }
616 
617     /// Convenience wrapper around TargetLowering::getSetCCResultType
618     EVT getSetCCResultType(EVT VT) const {
619       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
620     }
621 
622     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
623                          SDValue OrigLoad, SDValue ExtLoad,
624                          ISD::NodeType ExtType);
625   };
626 
627 /// This class is a DAGUpdateListener that removes any deleted
628 /// nodes from the worklist.
629 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
630   DAGCombiner &DC;
631 
632 public:
633   explicit WorklistRemover(DAGCombiner &dc)
634     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
635 
636   void NodeDeleted(SDNode *N, SDNode *E) override {
637     DC.removeFromWorklist(N);
638   }
639 };
640 
641 } // end anonymous namespace
642 
643 //===----------------------------------------------------------------------===//
644 //  TargetLowering::DAGCombinerInfo implementation
645 //===----------------------------------------------------------------------===//
646 
647 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
648   ((DAGCombiner*)DC)->AddToWorklist(N);
649 }
650 
651 SDValue TargetLowering::DAGCombinerInfo::
652 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
653   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
654 }
655 
656 SDValue TargetLowering::DAGCombinerInfo::
657 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
658   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
659 }
660 
661 SDValue TargetLowering::DAGCombinerInfo::
662 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
663   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
664 }
665 
666 void TargetLowering::DAGCombinerInfo::
667 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
668   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
669 }
670 
671 //===----------------------------------------------------------------------===//
672 // Helper Functions
673 //===----------------------------------------------------------------------===//
674 
675 void DAGCombiner::deleteAndRecombine(SDNode *N) {
676   removeFromWorklist(N);
677 
678   // If the operands of this node are only used by the node, they will now be
679   // dead. Make sure to re-visit them and recursively delete dead nodes.
680   for (const SDValue &Op : N->ops())
681     // For an operand generating multiple values, one of the values may
682     // become dead allowing further simplification (e.g. split index
683     // arithmetic from an indexed load).
684     if (Op->hasOneUse() || Op->getNumValues() > 1)
685       AddToWorklist(Op.getNode());
686 
687   DAG.DeleteNode(N);
688 }
689 
690 /// Return 1 if we can compute the negated form of the specified expression for
691 /// the same cost as the expression itself, or 2 if we can compute the negated
692 /// form more cheaply than the expression itself.
693 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
694                                const TargetLowering &TLI,
695                                const TargetOptions *Options,
696                                unsigned Depth = 0) {
697   // fneg is removable even if it has multiple uses.
698   if (Op.getOpcode() == ISD::FNEG) return 2;
699 
700   // Don't allow anything with multiple uses unless we know it is free.
701   EVT VT = Op.getValueType();
702   const SDNodeFlags Flags = Op->getFlags();
703   if (!Op.hasOneUse())
704     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
705           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
706       return 0;
707 
708   // Don't recurse exponentially.
709   if (Depth > 6) return 0;
710 
711   switch (Op.getOpcode()) {
712   default: return false;
713   case ISD::ConstantFP: {
714     if (!LegalOperations)
715       return 1;
716 
717     // Don't invert constant FP values after legalization unless the target says
718     // the negated constant is legal.
719     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
720       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
721   }
722   case ISD::FADD:
723     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
724       return 0;
725 
726     // After operation legalization, it might not be legal to create new FSUBs.
727     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
728       return 0;
729 
730     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
731     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
732                                     Options, Depth + 1))
733       return V;
734     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
735     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
736                               Depth + 1);
737   case ISD::FSUB:
738     // We can't turn -(A-B) into B-A when we honor signed zeros.
739     if (!Options->NoSignedZerosFPMath &&
740         !Flags.hasNoSignedZeros())
741       return 0;
742 
743     // fold (fneg (fsub A, B)) -> (fsub B, A)
744     return 1;
745 
746   case ISD::FMUL:
747   case ISD::FDIV:
748     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
749     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
750                                     Options, Depth + 1))
751       return V;
752 
753     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
754                               Depth + 1);
755 
756   case ISD::FP_EXTEND:
757   case ISD::FP_ROUND:
758   case ISD::FSIN:
759     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
760                               Depth + 1);
761   }
762 }
763 
764 /// If isNegatibleForFree returns true, return the newly negated expression.
765 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
766                                     bool LegalOperations, unsigned Depth = 0) {
767   const TargetOptions &Options = DAG.getTarget().Options;
768   // fneg is removable even if it has multiple uses.
769   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
770 
771   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
772 
773   const SDNodeFlags Flags = Op.getNode()->getFlags();
774 
775   switch (Op.getOpcode()) {
776   default: llvm_unreachable("Unknown code");
777   case ISD::ConstantFP: {
778     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
779     V.changeSign();
780     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
781   }
782   case ISD::FADD:
783     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
784 
785     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
786     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
787                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
788       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
789                          GetNegatedExpression(Op.getOperand(0), DAG,
790                                               LegalOperations, Depth+1),
791                          Op.getOperand(1), Flags);
792     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
793     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
794                        GetNegatedExpression(Op.getOperand(1), DAG,
795                                             LegalOperations, Depth+1),
796                        Op.getOperand(0), Flags);
797   case ISD::FSUB:
798     // fold (fneg (fsub 0, B)) -> B
799     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
800       if (N0CFP->isZero())
801         return Op.getOperand(1);
802 
803     // fold (fneg (fsub A, B)) -> (fsub B, A)
804     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
805                        Op.getOperand(1), Op.getOperand(0), Flags);
806 
807   case ISD::FMUL:
808   case ISD::FDIV:
809     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
810     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
811                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
812       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
813                          GetNegatedExpression(Op.getOperand(0), DAG,
814                                               LegalOperations, Depth+1),
815                          Op.getOperand(1), Flags);
816 
817     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
818     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
819                        Op.getOperand(0),
820                        GetNegatedExpression(Op.getOperand(1), DAG,
821                                             LegalOperations, Depth+1), Flags);
822 
823   case ISD::FP_EXTEND:
824   case ISD::FSIN:
825     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
826                        GetNegatedExpression(Op.getOperand(0), DAG,
827                                             LegalOperations, Depth+1));
828   case ISD::FP_ROUND:
829       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
830                          GetNegatedExpression(Op.getOperand(0), DAG,
831                                               LegalOperations, Depth+1),
832                          Op.getOperand(1));
833   }
834 }
835 
836 // APInts must be the same size for most operations, this helper
837 // function zero extends the shorter of the pair so that they match.
838 // We provide an Offset so that we can create bitwidths that won't overflow.
839 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
840   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
841   LHS = LHS.zextOrSelf(Bits);
842   RHS = RHS.zextOrSelf(Bits);
843 }
844 
845 // Return true if this node is a setcc, or is a select_cc
846 // that selects between the target values used for true and false, making it
847 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
848 // the appropriate nodes based on the type of node we are checking. This
849 // simplifies life a bit for the callers.
850 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
851                                     SDValue &CC) const {
852   if (N.getOpcode() == ISD::SETCC) {
853     LHS = N.getOperand(0);
854     RHS = N.getOperand(1);
855     CC  = N.getOperand(2);
856     return true;
857   }
858 
859   if (N.getOpcode() != ISD::SELECT_CC ||
860       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
861       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
862     return false;
863 
864   if (TLI.getBooleanContents(N.getValueType()) ==
865       TargetLowering::UndefinedBooleanContent)
866     return false;
867 
868   LHS = N.getOperand(0);
869   RHS = N.getOperand(1);
870   CC  = N.getOperand(4);
871   return true;
872 }
873 
874 /// Return true if this is a SetCC-equivalent operation with only one use.
875 /// If this is true, it allows the users to invert the operation for free when
876 /// it is profitable to do so.
877 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
878   SDValue N0, N1, N2;
879   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
880     return true;
881   return false;
882 }
883 
884 // Returns the SDNode if it is a constant float BuildVector
885 // or constant float.
886 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
887   if (isa<ConstantFPSDNode>(N))
888     return N.getNode();
889   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
890     return N.getNode();
891   return nullptr;
892 }
893 
894 // Determines if it is a constant integer or a build vector of constant
895 // integers (and undefs).
896 // Do not permit build vector implicit truncation.
897 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
898   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
899     return !(Const->isOpaque() && NoOpaques);
900   if (N.getOpcode() != ISD::BUILD_VECTOR)
901     return false;
902   unsigned BitWidth = N.getScalarValueSizeInBits();
903   for (const SDValue &Op : N->op_values()) {
904     if (Op.isUndef())
905       continue;
906     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
907     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
908         (Const->isOpaque() && NoOpaques))
909       return false;
910   }
911   return true;
912 }
913 
914 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
915 // undef's.
916 static bool isAnyConstantBuildVector(const SDNode *N) {
917   return ISD::isBuildVectorOfConstantSDNodes(N) ||
918          ISD::isBuildVectorOfConstantFPSDNodes(N);
919 }
920 
921 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
922                                     SDValue N1, SDNodeFlags Flags) {
923   // Don't reassociate reductions.
924   if (Flags.hasVectorReduction())
925     return SDValue();
926 
927   EVT VT = N0.getValueType();
928   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
929     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
930       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
931         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
932         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
933           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
934         return SDValue();
935       }
936       if (N0.hasOneUse()) {
937         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
938         // use
939         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
940         if (!OpNode.getNode())
941           return SDValue();
942         AddToWorklist(OpNode.getNode());
943         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
944       }
945     }
946   }
947 
948   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
949     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
950       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
951         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
952         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
953           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
954         return SDValue();
955       }
956       if (N1.hasOneUse()) {
957         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
958         // use
959         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
960         if (!OpNode.getNode())
961           return SDValue();
962         AddToWorklist(OpNode.getNode());
963         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
964       }
965     }
966   }
967 
968   return SDValue();
969 }
970 
971 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
972                                bool AddTo) {
973   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
974   ++NodesCombined;
975   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
976              To[0].getNode()->dump(&DAG);
977              dbgs() << " and " << NumTo - 1 << " other values\n");
978   for (unsigned i = 0, e = NumTo; i != e; ++i)
979     assert((!To[i].getNode() ||
980             N->getValueType(i) == To[i].getValueType()) &&
981            "Cannot combine value to value of different type!");
982 
983   WorklistRemover DeadNodes(*this);
984   DAG.ReplaceAllUsesWith(N, To);
985   if (AddTo) {
986     // Push the new nodes and any users onto the worklist
987     for (unsigned i = 0, e = NumTo; i != e; ++i) {
988       if (To[i].getNode()) {
989         AddToWorklist(To[i].getNode());
990         AddUsersToWorklist(To[i].getNode());
991       }
992     }
993   }
994 
995   // Finally, if the node is now dead, remove it from the graph.  The node
996   // may not be dead if the replacement process recursively simplified to
997   // something else needing this node.
998   if (N->use_empty())
999     deleteAndRecombine(N);
1000   return SDValue(N, 0);
1001 }
1002 
1003 void DAGCombiner::
1004 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1005   // Replace all uses.  If any nodes become isomorphic to other nodes and
1006   // are deleted, make sure to remove them from our worklist.
1007   WorklistRemover DeadNodes(*this);
1008   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1009 
1010   // Push the new node and any (possibly new) users onto the worklist.
1011   AddToWorklist(TLO.New.getNode());
1012   AddUsersToWorklist(TLO.New.getNode());
1013 
1014   // Finally, if the node is now dead, remove it from the graph.  The node
1015   // may not be dead if the replacement process recursively simplified to
1016   // something else needing this node.
1017   if (TLO.Old.getNode()->use_empty())
1018     deleteAndRecombine(TLO.Old.getNode());
1019 }
1020 
1021 /// Check the specified integer node value to see if it can be simplified or if
1022 /// things it uses can be simplified by bit propagation. If so, return true.
1023 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1024   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1025   KnownBits Known;
1026   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1027     return false;
1028 
1029   // Revisit the node.
1030   AddToWorklist(Op.getNode());
1031 
1032   // Replace the old value with the new one.
1033   ++NodesCombined;
1034   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1035              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1036              dbgs() << '\n');
1037 
1038   CommitTargetLoweringOpt(TLO);
1039   return true;
1040 }
1041 
1042 /// Check the specified vector node value to see if it can be simplified or
1043 /// if things it uses can be simplified as it only uses some of the elements.
1044 /// If so, return true.
1045 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1046                                              bool AssumeSingleUse) {
1047   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1048   APInt KnownUndef, KnownZero;
1049   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1050                                       0, AssumeSingleUse))
1051     return false;
1052 
1053   // Revisit the node.
1054   AddToWorklist(Op.getNode());
1055 
1056   // Replace the old value with the new one.
1057   ++NodesCombined;
1058   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1059              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1060              dbgs() << '\n');
1061 
1062   CommitTargetLoweringOpt(TLO);
1063   return true;
1064 }
1065 
1066 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1067   SDLoc DL(Load);
1068   EVT VT = Load->getValueType(0);
1069   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1070 
1071   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1072              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1073   WorklistRemover DeadNodes(*this);
1074   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1075   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1076   deleteAndRecombine(Load);
1077   AddToWorklist(Trunc.getNode());
1078 }
1079 
1080 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1081   Replace = false;
1082   SDLoc DL(Op);
1083   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1084     LoadSDNode *LD = cast<LoadSDNode>(Op);
1085     EVT MemVT = LD->getMemoryVT();
1086     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1087                                                       : LD->getExtensionType();
1088     Replace = true;
1089     return DAG.getExtLoad(ExtType, DL, PVT,
1090                           LD->getChain(), LD->getBasePtr(),
1091                           MemVT, LD->getMemOperand());
1092   }
1093 
1094   unsigned Opc = Op.getOpcode();
1095   switch (Opc) {
1096   default: break;
1097   case ISD::AssertSext:
1098     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1099       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1100     break;
1101   case ISD::AssertZext:
1102     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1103       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1104     break;
1105   case ISD::Constant: {
1106     unsigned ExtOpc =
1107       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1108     return DAG.getNode(ExtOpc, DL, PVT, Op);
1109   }
1110   }
1111 
1112   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1113     return SDValue();
1114   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1115 }
1116 
1117 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1118   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1119     return SDValue();
1120   EVT OldVT = Op.getValueType();
1121   SDLoc DL(Op);
1122   bool Replace = false;
1123   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1124   if (!NewOp.getNode())
1125     return SDValue();
1126   AddToWorklist(NewOp.getNode());
1127 
1128   if (Replace)
1129     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1130   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1131                      DAG.getValueType(OldVT));
1132 }
1133 
1134 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1135   EVT OldVT = Op.getValueType();
1136   SDLoc DL(Op);
1137   bool Replace = false;
1138   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1139   if (!NewOp.getNode())
1140     return SDValue();
1141   AddToWorklist(NewOp.getNode());
1142 
1143   if (Replace)
1144     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1145   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1146 }
1147 
1148 /// Promote the specified integer binary operation if the target indicates it is
1149 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1150 /// i32 since i16 instructions are longer.
1151 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1152   if (!LegalOperations)
1153     return SDValue();
1154 
1155   EVT VT = Op.getValueType();
1156   if (VT.isVector() || !VT.isInteger())
1157     return SDValue();
1158 
1159   // If operation type is 'undesirable', e.g. i16 on x86, consider
1160   // promoting it.
1161   unsigned Opc = Op.getOpcode();
1162   if (TLI.isTypeDesirableForOp(Opc, VT))
1163     return SDValue();
1164 
1165   EVT PVT = VT;
1166   // Consult target whether it is a good idea to promote this operation and
1167   // what's the right type to promote it to.
1168   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1169     assert(PVT != VT && "Don't know what type to promote to!");
1170 
1171     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1172 
1173     bool Replace0 = false;
1174     SDValue N0 = Op.getOperand(0);
1175     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1176 
1177     bool Replace1 = false;
1178     SDValue N1 = Op.getOperand(1);
1179     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1180     SDLoc DL(Op);
1181 
1182     SDValue RV =
1183         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1184 
1185     // We are always replacing N0/N1's use in N and only need
1186     // additional replacements if there are additional uses.
1187     Replace0 &= !N0->hasOneUse();
1188     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1189 
1190     // Combine Op here so it is preserved past replacements.
1191     CombineTo(Op.getNode(), RV);
1192 
1193     // If operands have a use ordering, make sure we deal with
1194     // predecessor first.
1195     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1196       std::swap(N0, N1);
1197       std::swap(NN0, NN1);
1198     }
1199 
1200     if (Replace0) {
1201       AddToWorklist(NN0.getNode());
1202       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1203     }
1204     if (Replace1) {
1205       AddToWorklist(NN1.getNode());
1206       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1207     }
1208     return Op;
1209   }
1210   return SDValue();
1211 }
1212 
1213 /// Promote the specified integer shift operation if the target indicates it is
1214 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1215 /// i32 since i16 instructions are longer.
1216 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1217   if (!LegalOperations)
1218     return SDValue();
1219 
1220   EVT VT = Op.getValueType();
1221   if (VT.isVector() || !VT.isInteger())
1222     return SDValue();
1223 
1224   // If operation type is 'undesirable', e.g. i16 on x86, consider
1225   // promoting it.
1226   unsigned Opc = Op.getOpcode();
1227   if (TLI.isTypeDesirableForOp(Opc, VT))
1228     return SDValue();
1229 
1230   EVT PVT = VT;
1231   // Consult target whether it is a good idea to promote this operation and
1232   // what's the right type to promote it to.
1233   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1234     assert(PVT != VT && "Don't know what type to promote to!");
1235 
1236     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1237 
1238     bool Replace = false;
1239     SDValue N0 = Op.getOperand(0);
1240     SDValue N1 = Op.getOperand(1);
1241     if (Opc == ISD::SRA)
1242       N0 = SExtPromoteOperand(N0, PVT);
1243     else if (Opc == ISD::SRL)
1244       N0 = ZExtPromoteOperand(N0, PVT);
1245     else
1246       N0 = PromoteOperand(N0, PVT, Replace);
1247 
1248     if (!N0.getNode())
1249       return SDValue();
1250 
1251     SDLoc DL(Op);
1252     SDValue RV =
1253         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1254 
1255     AddToWorklist(N0.getNode());
1256     if (Replace)
1257       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1258 
1259     // Deal with Op being deleted.
1260     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1261       return RV;
1262   }
1263   return SDValue();
1264 }
1265 
1266 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1267   if (!LegalOperations)
1268     return SDValue();
1269 
1270   EVT VT = Op.getValueType();
1271   if (VT.isVector() || !VT.isInteger())
1272     return SDValue();
1273 
1274   // If operation type is 'undesirable', e.g. i16 on x86, consider
1275   // promoting it.
1276   unsigned Opc = Op.getOpcode();
1277   if (TLI.isTypeDesirableForOp(Opc, VT))
1278     return SDValue();
1279 
1280   EVT PVT = VT;
1281   // Consult target whether it is a good idea to promote this operation and
1282   // what's the right type to promote it to.
1283   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1284     assert(PVT != VT && "Don't know what type to promote to!");
1285     // fold (aext (aext x)) -> (aext x)
1286     // fold (aext (zext x)) -> (zext x)
1287     // fold (aext (sext x)) -> (sext x)
1288     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1289     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1290   }
1291   return SDValue();
1292 }
1293 
1294 bool DAGCombiner::PromoteLoad(SDValue Op) {
1295   if (!LegalOperations)
1296     return false;
1297 
1298   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1299     return false;
1300 
1301   EVT VT = Op.getValueType();
1302   if (VT.isVector() || !VT.isInteger())
1303     return false;
1304 
1305   // If operation type is 'undesirable', e.g. i16 on x86, consider
1306   // promoting it.
1307   unsigned Opc = Op.getOpcode();
1308   if (TLI.isTypeDesirableForOp(Opc, VT))
1309     return false;
1310 
1311   EVT PVT = VT;
1312   // Consult target whether it is a good idea to promote this operation and
1313   // what's the right type to promote it to.
1314   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1315     assert(PVT != VT && "Don't know what type to promote to!");
1316 
1317     SDLoc DL(Op);
1318     SDNode *N = Op.getNode();
1319     LoadSDNode *LD = cast<LoadSDNode>(N);
1320     EVT MemVT = LD->getMemoryVT();
1321     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1322                                                       : LD->getExtensionType();
1323     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1324                                    LD->getChain(), LD->getBasePtr(),
1325                                    MemVT, LD->getMemOperand());
1326     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1327 
1328     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1329                Result.getNode()->dump(&DAG); dbgs() << '\n');
1330     WorklistRemover DeadNodes(*this);
1331     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1332     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1333     deleteAndRecombine(N);
1334     AddToWorklist(Result.getNode());
1335     return true;
1336   }
1337   return false;
1338 }
1339 
1340 /// Recursively delete a node which has no uses and any operands for
1341 /// which it is the only use.
1342 ///
1343 /// Note that this both deletes the nodes and removes them from the worklist.
1344 /// It also adds any nodes who have had a user deleted to the worklist as they
1345 /// may now have only one use and subject to other combines.
1346 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1347   if (!N->use_empty())
1348     return false;
1349 
1350   SmallSetVector<SDNode *, 16> Nodes;
1351   Nodes.insert(N);
1352   do {
1353     N = Nodes.pop_back_val();
1354     if (!N)
1355       continue;
1356 
1357     if (N->use_empty()) {
1358       for (const SDValue &ChildN : N->op_values())
1359         Nodes.insert(ChildN.getNode());
1360 
1361       removeFromWorklist(N);
1362       DAG.DeleteNode(N);
1363     } else {
1364       AddToWorklist(N);
1365     }
1366   } while (!Nodes.empty());
1367   return true;
1368 }
1369 
1370 //===----------------------------------------------------------------------===//
1371 //  Main DAG Combiner implementation
1372 //===----------------------------------------------------------------------===//
1373 
1374 void DAGCombiner::Run(CombineLevel AtLevel) {
1375   // set the instance variables, so that the various visit routines may use it.
1376   Level = AtLevel;
1377   LegalOperations = Level >= AfterLegalizeVectorOps;
1378   LegalTypes = Level >= AfterLegalizeTypes;
1379 
1380   // Add all the dag nodes to the worklist.
1381   for (SDNode &Node : DAG.allnodes())
1382     AddToWorklist(&Node);
1383 
1384   // Create a dummy node (which is not added to allnodes), that adds a reference
1385   // to the root node, preventing it from being deleted, and tracking any
1386   // changes of the root.
1387   HandleSDNode Dummy(DAG.getRoot());
1388 
1389   // While the worklist isn't empty, find a node and try to combine it.
1390   while (!WorklistMap.empty()) {
1391     SDNode *N;
1392     // The Worklist holds the SDNodes in order, but it may contain null entries.
1393     do {
1394       N = Worklist.pop_back_val();
1395     } while (!N);
1396 
1397     bool GoodWorklistEntry = WorklistMap.erase(N);
1398     (void)GoodWorklistEntry;
1399     assert(GoodWorklistEntry &&
1400            "Found a worklist entry without a corresponding map entry!");
1401 
1402     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1403     // N is deleted from the DAG, since they too may now be dead or may have a
1404     // reduced number of uses, allowing other xforms.
1405     if (recursivelyDeleteUnusedNodes(N))
1406       continue;
1407 
1408     WorklistRemover DeadNodes(*this);
1409 
1410     // If this combine is running after legalizing the DAG, re-legalize any
1411     // nodes pulled off the worklist.
1412     if (Level == AfterLegalizeDAG) {
1413       SmallSetVector<SDNode *, 16> UpdatedNodes;
1414       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1415 
1416       for (SDNode *LN : UpdatedNodes) {
1417         AddToWorklist(LN);
1418         AddUsersToWorklist(LN);
1419       }
1420       if (!NIsValid)
1421         continue;
1422     }
1423 
1424     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1425 
1426     // Add any operands of the new node which have not yet been combined to the
1427     // worklist as well. Because the worklist uniques things already, this
1428     // won't repeatedly process the same operand.
1429     CombinedNodes.insert(N);
1430     for (const SDValue &ChildN : N->op_values())
1431       if (!CombinedNodes.count(ChildN.getNode()))
1432         AddToWorklist(ChildN.getNode());
1433 
1434     SDValue RV = combine(N);
1435 
1436     if (!RV.getNode())
1437       continue;
1438 
1439     ++NodesCombined;
1440 
1441     // If we get back the same node we passed in, rather than a new node or
1442     // zero, we know that the node must have defined multiple values and
1443     // CombineTo was used.  Since CombineTo takes care of the worklist
1444     // mechanics for us, we have no work to do in this case.
1445     if (RV.getNode() == N)
1446       continue;
1447 
1448     assert(N->getOpcode() != ISD::DELETED_NODE &&
1449            RV.getOpcode() != ISD::DELETED_NODE &&
1450            "Node was deleted but visit returned new node!");
1451 
1452     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1453 
1454     if (N->getNumValues() == RV.getNode()->getNumValues())
1455       DAG.ReplaceAllUsesWith(N, RV.getNode());
1456     else {
1457       assert(N->getValueType(0) == RV.getValueType() &&
1458              N->getNumValues() == 1 && "Type mismatch");
1459       DAG.ReplaceAllUsesWith(N, &RV);
1460     }
1461 
1462     // Push the new node and any users onto the worklist
1463     AddToWorklist(RV.getNode());
1464     AddUsersToWorklist(RV.getNode());
1465 
1466     // Finally, if the node is now dead, remove it from the graph.  The node
1467     // may not be dead if the replacement process recursively simplified to
1468     // something else needing this node. This will also take care of adding any
1469     // operands which have lost a user to the worklist.
1470     recursivelyDeleteUnusedNodes(N);
1471   }
1472 
1473   // If the root changed (e.g. it was a dead load, update the root).
1474   DAG.setRoot(Dummy.getValue());
1475   DAG.RemoveDeadNodes();
1476 }
1477 
1478 SDValue DAGCombiner::visit(SDNode *N) {
1479   switch (N->getOpcode()) {
1480   default: break;
1481   case ISD::TokenFactor:        return visitTokenFactor(N);
1482   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1483   case ISD::ADD:                return visitADD(N);
1484   case ISD::SUB:                return visitSUB(N);
1485   case ISD::ADDC:               return visitADDC(N);
1486   case ISD::UADDO:              return visitUADDO(N);
1487   case ISD::SUBC:               return visitSUBC(N);
1488   case ISD::USUBO:              return visitUSUBO(N);
1489   case ISD::ADDE:               return visitADDE(N);
1490   case ISD::ADDCARRY:           return visitADDCARRY(N);
1491   case ISD::SUBE:               return visitSUBE(N);
1492   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1493   case ISD::MUL:                return visitMUL(N);
1494   case ISD::SDIV:               return visitSDIV(N);
1495   case ISD::UDIV:               return visitUDIV(N);
1496   case ISD::SREM:
1497   case ISD::UREM:               return visitREM(N);
1498   case ISD::MULHU:              return visitMULHU(N);
1499   case ISD::MULHS:              return visitMULHS(N);
1500   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1501   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1502   case ISD::SMULO:              return visitSMULO(N);
1503   case ISD::UMULO:              return visitUMULO(N);
1504   case ISD::SMIN:
1505   case ISD::SMAX:
1506   case ISD::UMIN:
1507   case ISD::UMAX:               return visitIMINMAX(N);
1508   case ISD::AND:                return visitAND(N);
1509   case ISD::OR:                 return visitOR(N);
1510   case ISD::XOR:                return visitXOR(N);
1511   case ISD::SHL:                return visitSHL(N);
1512   case ISD::SRA:                return visitSRA(N);
1513   case ISD::SRL:                return visitSRL(N);
1514   case ISD::ROTR:
1515   case ISD::ROTL:               return visitRotate(N);
1516   case ISD::ABS:                return visitABS(N);
1517   case ISD::BSWAP:              return visitBSWAP(N);
1518   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1519   case ISD::CTLZ:               return visitCTLZ(N);
1520   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1521   case ISD::CTTZ:               return visitCTTZ(N);
1522   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1523   case ISD::CTPOP:              return visitCTPOP(N);
1524   case ISD::SELECT:             return visitSELECT(N);
1525   case ISD::VSELECT:            return visitVSELECT(N);
1526   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1527   case ISD::SETCC:              return visitSETCC(N);
1528   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1529   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1530   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1531   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1532   case ISD::AssertSext:
1533   case ISD::AssertZext:         return visitAssertExt(N);
1534   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1535   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1536   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1537   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1538   case ISD::BITCAST:            return visitBITCAST(N);
1539   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1540   case ISD::FADD:               return visitFADD(N);
1541   case ISD::FSUB:               return visitFSUB(N);
1542   case ISD::FMUL:               return visitFMUL(N);
1543   case ISD::FMA:                return visitFMA(N);
1544   case ISD::FDIV:               return visitFDIV(N);
1545   case ISD::FREM:               return visitFREM(N);
1546   case ISD::FSQRT:              return visitFSQRT(N);
1547   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1548   case ISD::FPOW:               return visitFPOW(N);
1549   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1550   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1551   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1552   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1553   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1554   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1555   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1556   case ISD::FNEG:               return visitFNEG(N);
1557   case ISD::FABS:               return visitFABS(N);
1558   case ISD::FFLOOR:             return visitFFLOOR(N);
1559   case ISD::FMINNUM:            return visitFMINNUM(N);
1560   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1561   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1562   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1563   case ISD::FCEIL:              return visitFCEIL(N);
1564   case ISD::FTRUNC:             return visitFTRUNC(N);
1565   case ISD::BRCOND:             return visitBRCOND(N);
1566   case ISD::BR_CC:              return visitBR_CC(N);
1567   case ISD::LOAD:               return visitLOAD(N);
1568   case ISD::STORE:              return visitSTORE(N);
1569   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1570   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1571   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1572   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1573   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1574   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1575   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1576   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1577   case ISD::MGATHER:            return visitMGATHER(N);
1578   case ISD::MLOAD:              return visitMLOAD(N);
1579   case ISD::MSCATTER:           return visitMSCATTER(N);
1580   case ISD::MSTORE:             return visitMSTORE(N);
1581   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1582   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1583   }
1584   return SDValue();
1585 }
1586 
1587 SDValue DAGCombiner::combine(SDNode *N) {
1588   SDValue RV = visit(N);
1589 
1590   // If nothing happened, try a target-specific DAG combine.
1591   if (!RV.getNode()) {
1592     assert(N->getOpcode() != ISD::DELETED_NODE &&
1593            "Node was deleted but visit returned NULL!");
1594 
1595     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1596         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1597 
1598       // Expose the DAG combiner to the target combiner impls.
1599       TargetLowering::DAGCombinerInfo
1600         DagCombineInfo(DAG, Level, false, this);
1601 
1602       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1603     }
1604   }
1605 
1606   // If nothing happened still, try promoting the operation.
1607   if (!RV.getNode()) {
1608     switch (N->getOpcode()) {
1609     default: break;
1610     case ISD::ADD:
1611     case ISD::SUB:
1612     case ISD::MUL:
1613     case ISD::AND:
1614     case ISD::OR:
1615     case ISD::XOR:
1616       RV = PromoteIntBinOp(SDValue(N, 0));
1617       break;
1618     case ISD::SHL:
1619     case ISD::SRA:
1620     case ISD::SRL:
1621       RV = PromoteIntShiftOp(SDValue(N, 0));
1622       break;
1623     case ISD::SIGN_EXTEND:
1624     case ISD::ZERO_EXTEND:
1625     case ISD::ANY_EXTEND:
1626       RV = PromoteExtend(SDValue(N, 0));
1627       break;
1628     case ISD::LOAD:
1629       if (PromoteLoad(SDValue(N, 0)))
1630         RV = SDValue(N, 0);
1631       break;
1632     }
1633   }
1634 
1635   // If N is a commutative binary node, try eliminate it if the commuted
1636   // version is already present in the DAG.
1637   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1638       N->getNumValues() == 1) {
1639     SDValue N0 = N->getOperand(0);
1640     SDValue N1 = N->getOperand(1);
1641 
1642     // Constant operands are canonicalized to RHS.
1643     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1644       SDValue Ops[] = {N1, N0};
1645       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1646                                             N->getFlags());
1647       if (CSENode)
1648         return SDValue(CSENode, 0);
1649     }
1650   }
1651 
1652   return RV;
1653 }
1654 
1655 /// Given a node, return its input chain if it has one, otherwise return a null
1656 /// sd operand.
1657 static SDValue getInputChainForNode(SDNode *N) {
1658   if (unsigned NumOps = N->getNumOperands()) {
1659     if (N->getOperand(0).getValueType() == MVT::Other)
1660       return N->getOperand(0);
1661     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1662       return N->getOperand(NumOps-1);
1663     for (unsigned i = 1; i < NumOps-1; ++i)
1664       if (N->getOperand(i).getValueType() == MVT::Other)
1665         return N->getOperand(i);
1666   }
1667   return SDValue();
1668 }
1669 
1670 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1671   // If N has two operands, where one has an input chain equal to the other,
1672   // the 'other' chain is redundant.
1673   if (N->getNumOperands() == 2) {
1674     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1675       return N->getOperand(0);
1676     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1677       return N->getOperand(1);
1678   }
1679 
1680   // Don't simplify token factors if optnone.
1681   if (OptLevel == CodeGenOpt::None)
1682     return SDValue();
1683 
1684   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1685   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1686   SmallPtrSet<SDNode*, 16> SeenOps;
1687   bool Changed = false;             // If we should replace this token factor.
1688 
1689   // Start out with this token factor.
1690   TFs.push_back(N);
1691 
1692   // Iterate through token factors.  The TFs grows when new token factors are
1693   // encountered.
1694   for (unsigned i = 0; i < TFs.size(); ++i) {
1695     SDNode *TF = TFs[i];
1696 
1697     // Check each of the operands.
1698     for (const SDValue &Op : TF->op_values()) {
1699       switch (Op.getOpcode()) {
1700       case ISD::EntryToken:
1701         // Entry tokens don't need to be added to the list. They are
1702         // redundant.
1703         Changed = true;
1704         break;
1705 
1706       case ISD::TokenFactor:
1707         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1708           // Queue up for processing.
1709           TFs.push_back(Op.getNode());
1710           // Clean up in case the token factor is removed.
1711           AddToWorklist(Op.getNode());
1712           Changed = true;
1713           break;
1714         }
1715         LLVM_FALLTHROUGH;
1716 
1717       default:
1718         // Only add if it isn't already in the list.
1719         if (SeenOps.insert(Op.getNode()).second)
1720           Ops.push_back(Op);
1721         else
1722           Changed = true;
1723         break;
1724       }
1725     }
1726   }
1727 
1728   // Remove Nodes that are chained to another node in the list. Do so
1729   // by walking up chains breath-first stopping when we've seen
1730   // another operand. In general we must climb to the EntryNode, but we can exit
1731   // early if we find all remaining work is associated with just one operand as
1732   // no further pruning is possible.
1733 
1734   // List of nodes to search through and original Ops from which they originate.
1735   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1736   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1737   SmallPtrSet<SDNode *, 16> SeenChains;
1738   bool DidPruneOps = false;
1739 
1740   unsigned NumLeftToConsider = 0;
1741   for (const SDValue &Op : Ops) {
1742     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1743     OpWorkCount.push_back(1);
1744   }
1745 
1746   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1747     // If this is an Op, we can remove the op from the list. Remark any
1748     // search associated with it as from the current OpNumber.
1749     if (SeenOps.count(Op) != 0) {
1750       Changed = true;
1751       DidPruneOps = true;
1752       unsigned OrigOpNumber = 0;
1753       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1754         OrigOpNumber++;
1755       assert((OrigOpNumber != Ops.size()) &&
1756              "expected to find TokenFactor Operand");
1757       // Re-mark worklist from OrigOpNumber to OpNumber
1758       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1759         if (Worklist[i].second == OrigOpNumber) {
1760           Worklist[i].second = OpNumber;
1761         }
1762       }
1763       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1764       OpWorkCount[OrigOpNumber] = 0;
1765       NumLeftToConsider--;
1766     }
1767     // Add if it's a new chain
1768     if (SeenChains.insert(Op).second) {
1769       OpWorkCount[OpNumber]++;
1770       Worklist.push_back(std::make_pair(Op, OpNumber));
1771     }
1772   };
1773 
1774   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1775     // We need at least be consider at least 2 Ops to prune.
1776     if (NumLeftToConsider <= 1)
1777       break;
1778     auto CurNode = Worklist[i].first;
1779     auto CurOpNumber = Worklist[i].second;
1780     assert((OpWorkCount[CurOpNumber] > 0) &&
1781            "Node should not appear in worklist");
1782     switch (CurNode->getOpcode()) {
1783     case ISD::EntryToken:
1784       // Hitting EntryToken is the only way for the search to terminate without
1785       // hitting
1786       // another operand's search. Prevent us from marking this operand
1787       // considered.
1788       NumLeftToConsider++;
1789       break;
1790     case ISD::TokenFactor:
1791       for (const SDValue &Op : CurNode->op_values())
1792         AddToWorklist(i, Op.getNode(), CurOpNumber);
1793       break;
1794     case ISD::CopyFromReg:
1795     case ISD::CopyToReg:
1796       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1797       break;
1798     default:
1799       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1800         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1801       break;
1802     }
1803     OpWorkCount[CurOpNumber]--;
1804     if (OpWorkCount[CurOpNumber] == 0)
1805       NumLeftToConsider--;
1806   }
1807 
1808   // If we've changed things around then replace token factor.
1809   if (Changed) {
1810     SDValue Result;
1811     if (Ops.empty()) {
1812       // The entry token is the only possible outcome.
1813       Result = DAG.getEntryNode();
1814     } else {
1815       if (DidPruneOps) {
1816         SmallVector<SDValue, 8> PrunedOps;
1817         //
1818         for (const SDValue &Op : Ops) {
1819           if (SeenChains.count(Op.getNode()) == 0)
1820             PrunedOps.push_back(Op);
1821         }
1822         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1823       } else {
1824         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1825       }
1826     }
1827     return Result;
1828   }
1829   return SDValue();
1830 }
1831 
1832 /// MERGE_VALUES can always be eliminated.
1833 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1834   WorklistRemover DeadNodes(*this);
1835   // Replacing results may cause a different MERGE_VALUES to suddenly
1836   // be CSE'd with N, and carry its uses with it. Iterate until no
1837   // uses remain, to ensure that the node can be safely deleted.
1838   // First add the users of this node to the work list so that they
1839   // can be tried again once they have new operands.
1840   AddUsersToWorklist(N);
1841   do {
1842     // Do as a single replacement to avoid rewalking use lists.
1843     SmallVector<SDValue, 8> Ops;
1844     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1845       Ops.push_back(N->getOperand(i));
1846     DAG.ReplaceAllUsesWith(N, Ops.data());
1847   } while (!N->use_empty());
1848   deleteAndRecombine(N);
1849   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1850 }
1851 
1852 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1853 /// ConstantSDNode pointer else nullptr.
1854 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1855   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1856   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1857 }
1858 
1859 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1860   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1861 
1862   // Don't do this unless the old select is going away. We want to eliminate the
1863   // binary operator, not replace a binop with a select.
1864   // TODO: Handle ISD::SELECT_CC.
1865   unsigned SelOpNo = 0;
1866   SDValue Sel = BO->getOperand(0);
1867   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1868     SelOpNo = 1;
1869     Sel = BO->getOperand(1);
1870   }
1871 
1872   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1873     return SDValue();
1874 
1875   SDValue CT = Sel.getOperand(1);
1876   if (!isConstantOrConstantVector(CT, true) &&
1877       !isConstantFPBuildVectorOrConstantFP(CT))
1878     return SDValue();
1879 
1880   SDValue CF = Sel.getOperand(2);
1881   if (!isConstantOrConstantVector(CF, true) &&
1882       !isConstantFPBuildVectorOrConstantFP(CF))
1883     return SDValue();
1884 
1885   // Bail out if any constants are opaque because we can't constant fold those.
1886   // The exception is "and" and "or" with either 0 or -1 in which case we can
1887   // propagate non constant operands into select. I.e.:
1888   // and (select Cond, 0, -1), X --> select Cond, 0, X
1889   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1890   auto BinOpcode = BO->getOpcode();
1891   bool CanFoldNonConst =
1892       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1893       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1894       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1895 
1896   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1897   if (!CanFoldNonConst &&
1898       !isConstantOrConstantVector(CBO, true) &&
1899       !isConstantFPBuildVectorOrConstantFP(CBO))
1900     return SDValue();
1901 
1902   EVT VT = Sel.getValueType();
1903 
1904   // In case of shift value and shift amount may have different VT. For instance
1905   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1906   // swapped operands and value types do not match. NB: x86 is fine if operands
1907   // are not swapped with shift amount VT being not bigger than shifted value.
1908   // TODO: that is possible to check for a shift operation, correct VTs and
1909   // still perform optimization on x86 if needed.
1910   if (SelOpNo && VT != CBO.getValueType())
1911     return SDValue();
1912 
1913   // We have a select-of-constants followed by a binary operator with a
1914   // constant. Eliminate the binop by pulling the constant math into the select.
1915   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1916   SDLoc DL(Sel);
1917   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1918                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1919   if (!CanFoldNonConst && !NewCT.isUndef() &&
1920       !isConstantOrConstantVector(NewCT, true) &&
1921       !isConstantFPBuildVectorOrConstantFP(NewCT))
1922     return SDValue();
1923 
1924   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1925                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1926   if (!CanFoldNonConst && !NewCF.isUndef() &&
1927       !isConstantOrConstantVector(NewCF, true) &&
1928       !isConstantFPBuildVectorOrConstantFP(NewCF))
1929     return SDValue();
1930 
1931   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1932 }
1933 
1934 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1935   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1936          "Expecting add or sub");
1937 
1938   // Match a constant operand and a zext operand for the math instruction:
1939   // add Z, C
1940   // sub C, Z
1941   bool IsAdd = N->getOpcode() == ISD::ADD;
1942   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1943   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1944   auto *CN = dyn_cast<ConstantSDNode>(C);
1945   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1946     return SDValue();
1947 
1948   // Match the zext operand as a setcc of a boolean.
1949   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1950       Z.getOperand(0).getValueType() != MVT::i1)
1951     return SDValue();
1952 
1953   // Match the compare as: setcc (X & 1), 0, eq.
1954   SDValue SetCC = Z.getOperand(0);
1955   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1956   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1957       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1958       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1959     return SDValue();
1960 
1961   // We are adding/subtracting a constant and an inverted low bit. Turn that
1962   // into a subtract/add of the low bit with incremented/decremented constant:
1963   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1964   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1965   EVT VT = C.getValueType();
1966   SDLoc DL(N);
1967   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1968   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1969                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1970   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1971 }
1972 
1973 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1974 /// a shift and add with a different constant.
1975 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1976   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1977          "Expecting add or sub");
1978 
1979   // We need a constant operand for the add/sub, and the other operand is a
1980   // logical shift right: add (srl), C or sub C, (srl).
1981   bool IsAdd = N->getOpcode() == ISD::ADD;
1982   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1983   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1984   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1985   if (!C || ShiftOp.getOpcode() != ISD::SRL)
1986     return SDValue();
1987 
1988   // The shift must be of a 'not' value.
1989   SDValue Not = ShiftOp.getOperand(0);
1990   if (!Not.hasOneUse() || !isBitwiseNot(Not))
1991     return SDValue();
1992 
1993   // The shift must be moving the sign bit to the least-significant-bit.
1994   EVT VT = ShiftOp.getValueType();
1995   SDValue ShAmt = ShiftOp.getOperand(1);
1996   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
1997   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
1998     return SDValue();
1999 
2000   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2001   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2002   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2003   SDLoc DL(N);
2004   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2005   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2006   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2007   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2008 }
2009 
2010 SDValue DAGCombiner::visitADD(SDNode *N) {
2011   SDValue N0 = N->getOperand(0);
2012   SDValue N1 = N->getOperand(1);
2013   EVT VT = N0.getValueType();
2014   SDLoc DL(N);
2015 
2016   // fold vector ops
2017   if (VT.isVector()) {
2018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2019       return FoldedVOp;
2020 
2021     // fold (add x, 0) -> x, vector edition
2022     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2023       return N0;
2024     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2025       return N1;
2026   }
2027 
2028   // fold (add x, undef) -> undef
2029   if (N0.isUndef())
2030     return N0;
2031 
2032   if (N1.isUndef())
2033     return N1;
2034 
2035   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2036     // canonicalize constant to RHS
2037     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2038       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2039     // fold (add c1, c2) -> c1+c2
2040     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2041                                       N1.getNode());
2042   }
2043 
2044   // fold (add x, 0) -> x
2045   if (isNullConstant(N1))
2046     return N0;
2047 
2048   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2049     // fold ((c1-A)+c2) -> (c1+c2)-A
2050     if (N0.getOpcode() == ISD::SUB &&
2051         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2052       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2053       return DAG.getNode(ISD::SUB, DL, VT,
2054                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2055                          N0.getOperand(1));
2056     }
2057 
2058     // add (sext i1 X), 1 -> zext (not i1 X)
2059     // We don't transform this pattern:
2060     //   add (zext i1 X), -1 -> sext (not i1 X)
2061     // because most (?) targets generate better code for the zext form.
2062     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2063         isOneOrOneSplat(N1)) {
2064       SDValue X = N0.getOperand(0);
2065       if ((!LegalOperations ||
2066            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2067             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2068           X.getScalarValueSizeInBits() == 1) {
2069         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2070         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2071       }
2072     }
2073 
2074     // Undo the add -> or combine to merge constant offsets from a frame index.
2075     if (N0.getOpcode() == ISD::OR &&
2076         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2077         isa<ConstantSDNode>(N0.getOperand(1)) &&
2078         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2079       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2080       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2081     }
2082   }
2083 
2084   if (SDValue NewSel = foldBinOpIntoSelect(N))
2085     return NewSel;
2086 
2087   // reassociate add
2088   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2089     return RADD;
2090 
2091   // fold ((0-A) + B) -> B-A
2092   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2093     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2094 
2095   // fold (A + (0-B)) -> A-B
2096   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2097     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2098 
2099   // fold (A+(B-A)) -> B
2100   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2101     return N1.getOperand(0);
2102 
2103   // fold ((B-A)+A) -> B
2104   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2105     return N0.getOperand(0);
2106 
2107   // fold (A+(B-(A+C))) to (B-C)
2108   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2109       N0 == N1.getOperand(1).getOperand(0))
2110     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2111                        N1.getOperand(1).getOperand(1));
2112 
2113   // fold (A+(B-(C+A))) to (B-C)
2114   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2115       N0 == N1.getOperand(1).getOperand(1))
2116     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2117                        N1.getOperand(1).getOperand(0));
2118 
2119   // fold (A+((B-A)+or-C)) to (B+or-C)
2120   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2121       N1.getOperand(0).getOpcode() == ISD::SUB &&
2122       N0 == N1.getOperand(0).getOperand(1))
2123     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2124                        N1.getOperand(1));
2125 
2126   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2127   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2128     SDValue N00 = N0.getOperand(0);
2129     SDValue N01 = N0.getOperand(1);
2130     SDValue N10 = N1.getOperand(0);
2131     SDValue N11 = N1.getOperand(1);
2132 
2133     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2134       return DAG.getNode(ISD::SUB, DL, VT,
2135                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2136                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2137   }
2138 
2139   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2140     return V;
2141 
2142   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2143     return V;
2144 
2145   if (SimplifyDemandedBits(SDValue(N, 0)))
2146     return SDValue(N, 0);
2147 
2148   // fold (a+b) -> (a|b) iff a and b share no bits.
2149   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2150       DAG.haveNoCommonBitsSet(N0, N1))
2151     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2152 
2153   // fold (add (xor a, -1), 1) -> (sub 0, a)
2154   if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2155     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2156                        N0.getOperand(0));
2157 
2158   if (SDValue Combined = visitADDLike(N0, N1, N))
2159     return Combined;
2160 
2161   if (SDValue Combined = visitADDLike(N1, N0, N))
2162     return Combined;
2163 
2164   return SDValue();
2165 }
2166 
2167 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2168   bool Masked = false;
2169 
2170   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2171   while (true) {
2172     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2173       V = V.getOperand(0);
2174       continue;
2175     }
2176 
2177     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2178       Masked = true;
2179       V = V.getOperand(0);
2180       continue;
2181     }
2182 
2183     break;
2184   }
2185 
2186   // If this is not a carry, return.
2187   if (V.getResNo() != 1)
2188     return SDValue();
2189 
2190   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2191       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2192     return SDValue();
2193 
2194   // If the result is masked, then no matter what kind of bool it is we can
2195   // return. If it isn't, then we need to make sure the bool type is either 0 or
2196   // 1 and not other values.
2197   if (Masked ||
2198       TLI.getBooleanContents(V.getValueType()) ==
2199           TargetLoweringBase::ZeroOrOneBooleanContent)
2200     return V;
2201 
2202   return SDValue();
2203 }
2204 
2205 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2206   EVT VT = N0.getValueType();
2207   SDLoc DL(LocReference);
2208 
2209   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2210   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2211       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2212     return DAG.getNode(ISD::SUB, DL, VT, N0,
2213                        DAG.getNode(ISD::SHL, DL, VT,
2214                                    N1.getOperand(0).getOperand(1),
2215                                    N1.getOperand(1)));
2216 
2217   if (N1.getOpcode() == ISD::AND) {
2218     SDValue AndOp0 = N1.getOperand(0);
2219     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2220     unsigned DestBits = VT.getScalarSizeInBits();
2221 
2222     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2223     // and similar xforms where the inner op is either ~0 or 0.
2224     if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
2225       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2226   }
2227 
2228   // add (sext i1), X -> sub X, (zext i1)
2229   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2230       N0.getOperand(0).getValueType() == MVT::i1 &&
2231       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2232     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2233     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2234   }
2235 
2236   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2237   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2238     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2239     if (TN->getVT() == MVT::i1) {
2240       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2241                                  DAG.getConstant(1, DL, VT));
2242       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2243     }
2244   }
2245 
2246   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2247   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2248       N1.getResNo() == 0)
2249     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2250                        N0, N1.getOperand(0), N1.getOperand(2));
2251 
2252   // (add X, Carry) -> (addcarry X, 0, Carry)
2253   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2254     if (SDValue Carry = getAsCarry(TLI, N1))
2255       return DAG.getNode(ISD::ADDCARRY, DL,
2256                          DAG.getVTList(VT, Carry.getValueType()), N0,
2257                          DAG.getConstant(0, DL, VT), Carry);
2258 
2259   return SDValue();
2260 }
2261 
2262 SDValue DAGCombiner::visitADDC(SDNode *N) {
2263   SDValue N0 = N->getOperand(0);
2264   SDValue N1 = N->getOperand(1);
2265   EVT VT = N0.getValueType();
2266   SDLoc DL(N);
2267 
2268   // If the flag result is dead, turn this into an ADD.
2269   if (!N->hasAnyUseOfValue(1))
2270     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2271                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2272 
2273   // canonicalize constant to RHS.
2274   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2275   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2276   if (N0C && !N1C)
2277     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2278 
2279   // fold (addc x, 0) -> x + no carry out
2280   if (isNullConstant(N1))
2281     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2282                                         DL, MVT::Glue));
2283 
2284   // If it cannot overflow, transform into an add.
2285   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2286     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2287                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2288 
2289   return SDValue();
2290 }
2291 
2292 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2293                            SelectionDAG &DAG, const TargetLowering &TLI) {
2294   SDValue Cst;
2295   switch (TLI.getBooleanContents(VT)) {
2296   case TargetLowering::ZeroOrOneBooleanContent:
2297   case TargetLowering::UndefinedBooleanContent:
2298     Cst = DAG.getConstant(1, DL, VT);
2299     break;
2300   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2301     Cst = DAG.getConstant(-1, DL, VT);
2302     break;
2303   }
2304 
2305   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2306 }
2307 
2308 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2309   if (V.getOpcode() != ISD::XOR) return false;
2310   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2311   if (!Const) return false;
2312 
2313   switch(TLI.getBooleanContents(VT)) {
2314     case TargetLowering::ZeroOrOneBooleanContent:
2315       return Const->isOne();
2316     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2317       return Const->isAllOnesValue();
2318     case TargetLowering::UndefinedBooleanContent:
2319       return (Const->getAPIntValue() & 0x01) == 1;
2320   }
2321   llvm_unreachable("Unsupported boolean content");
2322 }
2323 
2324 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2325   SDValue N0 = N->getOperand(0);
2326   SDValue N1 = N->getOperand(1);
2327   EVT VT = N0.getValueType();
2328   if (VT.isVector())
2329     return SDValue();
2330 
2331   EVT CarryVT = N->getValueType(1);
2332   SDLoc DL(N);
2333 
2334   // If the flag result is dead, turn this into an ADD.
2335   if (!N->hasAnyUseOfValue(1))
2336     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2337                      DAG.getUNDEF(CarryVT));
2338 
2339   // canonicalize constant to RHS.
2340   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2341   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2342   if (N0C && !N1C)
2343     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2344 
2345   // fold (uaddo x, 0) -> x + no carry out
2346   if (isNullConstant(N1))
2347     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2348 
2349   // If it cannot overflow, transform into an add.
2350   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2351     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2352                      DAG.getConstant(0, DL, CarryVT));
2353 
2354   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2355   if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2356     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2357                               DAG.getConstant(0, DL, VT),
2358                               N0.getOperand(0));
2359     return CombineTo(N, Sub,
2360                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2361   }
2362 
2363   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2364     return Combined;
2365 
2366   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2367     return Combined;
2368 
2369   return SDValue();
2370 }
2371 
2372 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2373   auto VT = N0.getValueType();
2374 
2375   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2376   // If Y + 1 cannot overflow.
2377   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2378     SDValue Y = N1.getOperand(0);
2379     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2380     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2381       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2382                          N1.getOperand(2));
2383   }
2384 
2385   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2386   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2387     if (SDValue Carry = getAsCarry(TLI, N1))
2388       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2389                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2390 
2391   return SDValue();
2392 }
2393 
2394 SDValue DAGCombiner::visitADDE(SDNode *N) {
2395   SDValue N0 = N->getOperand(0);
2396   SDValue N1 = N->getOperand(1);
2397   SDValue CarryIn = N->getOperand(2);
2398 
2399   // canonicalize constant to RHS
2400   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2401   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2402   if (N0C && !N1C)
2403     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2404                        N1, N0, CarryIn);
2405 
2406   // fold (adde x, y, false) -> (addc x, y)
2407   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2408     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2409 
2410   return SDValue();
2411 }
2412 
2413 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2414   SDValue N0 = N->getOperand(0);
2415   SDValue N1 = N->getOperand(1);
2416   SDValue CarryIn = N->getOperand(2);
2417   SDLoc DL(N);
2418 
2419   // canonicalize constant to RHS
2420   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2421   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2422   if (N0C && !N1C)
2423     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2424 
2425   // fold (addcarry x, y, false) -> (uaddo x, y)
2426   if (isNullConstant(CarryIn)) {
2427     if (!LegalOperations ||
2428         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2429       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2430   }
2431 
2432   EVT CarryVT = CarryIn.getValueType();
2433 
2434   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2435   if (isNullConstant(N0) && isNullConstant(N1)) {
2436     EVT VT = N0.getValueType();
2437     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2438     AddToWorklist(CarryExt.getNode());
2439     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2440                                     DAG.getConstant(1, DL, VT)),
2441                      DAG.getConstant(0, DL, CarryVT));
2442   }
2443 
2444   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2445   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2446       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2447     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2448                               DAG.getConstant(0, DL, N0.getValueType()),
2449                               N0.getOperand(0), CarryIn.getOperand(0));
2450     return CombineTo(N, Sub,
2451                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2452   }
2453 
2454   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2455     return Combined;
2456 
2457   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2458     return Combined;
2459 
2460   return SDValue();
2461 }
2462 
2463 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2464                                        SDNode *N) {
2465   // Iff the flag result is dead:
2466   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2467   if ((N0.getOpcode() == ISD::ADD ||
2468        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2469       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2470     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2471                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2472 
2473   /**
2474    * When one of the addcarry argument is itself a carry, we may be facing
2475    * a diamond carry propagation. In which case we try to transform the DAG
2476    * to ensure linear carry propagation if that is possible.
2477    *
2478    * We are trying to get:
2479    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2480    */
2481   if (auto Y = getAsCarry(TLI, N1)) {
2482     /**
2483      *            (uaddo A, B)
2484      *             /       \
2485      *          Carry      Sum
2486      *            |          \
2487      *            | (addcarry *, 0, Z)
2488      *            |       /
2489      *             \   Carry
2490      *              |   /
2491      * (addcarry X, *, *)
2492      */
2493     if (Y.getOpcode() == ISD::UADDO &&
2494         CarryIn.getResNo() == 1 &&
2495         CarryIn.getOpcode() == ISD::ADDCARRY &&
2496         isNullConstant(CarryIn.getOperand(1)) &&
2497         CarryIn.getOperand(0) == Y.getValue(0)) {
2498       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2499                               Y.getOperand(0), Y.getOperand(1),
2500                               CarryIn.getOperand(2));
2501       AddToWorklist(NewY.getNode());
2502       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2503                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2504                          NewY.getValue(1));
2505     }
2506   }
2507 
2508   return SDValue();
2509 }
2510 
2511 // Since it may not be valid to emit a fold to zero for vector initializers
2512 // check if we can before folding.
2513 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2514                              SelectionDAG &DAG, bool LegalOperations) {
2515   if (!VT.isVector())
2516     return DAG.getConstant(0, DL, VT);
2517   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2518     return DAG.getConstant(0, DL, VT);
2519   return SDValue();
2520 }
2521 
2522 SDValue DAGCombiner::visitSUB(SDNode *N) {
2523   SDValue N0 = N->getOperand(0);
2524   SDValue N1 = N->getOperand(1);
2525   EVT VT = N0.getValueType();
2526   SDLoc DL(N);
2527 
2528   // fold vector ops
2529   if (VT.isVector()) {
2530     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2531       return FoldedVOp;
2532 
2533     // fold (sub x, 0) -> x, vector edition
2534     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2535       return N0;
2536   }
2537 
2538   // fold (sub x, x) -> 0
2539   // FIXME: Refactor this and xor and other similar operations together.
2540   if (N0 == N1)
2541     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2542   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2543       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2544     // fold (sub c1, c2) -> c1-c2
2545     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2546                                       N1.getNode());
2547   }
2548 
2549   if (SDValue NewSel = foldBinOpIntoSelect(N))
2550     return NewSel;
2551 
2552   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2553 
2554   // fold (sub x, c) -> (add x, -c)
2555   if (N1C) {
2556     return DAG.getNode(ISD::ADD, DL, VT, N0,
2557                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2558   }
2559 
2560   if (isNullOrNullSplat(N0)) {
2561     unsigned BitWidth = VT.getScalarSizeInBits();
2562     // Right-shifting everything out but the sign bit followed by negation is
2563     // the same as flipping arithmetic/logical shift type without the negation:
2564     // -(X >>u 31) -> (X >>s 31)
2565     // -(X >>s 31) -> (X >>u 31)
2566     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2567       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2568       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2569         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2570         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2571           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2572       }
2573     }
2574 
2575     // 0 - X --> 0 if the sub is NUW.
2576     if (N->getFlags().hasNoUnsignedWrap())
2577       return N0;
2578 
2579     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2580       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2581       // N1 must be 0 because negating the minimum signed value is undefined.
2582       if (N->getFlags().hasNoSignedWrap())
2583         return N0;
2584 
2585       // 0 - X --> X if X is 0 or the minimum signed value.
2586       return N1;
2587     }
2588   }
2589 
2590   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2591   if (isAllOnesOrAllOnesSplat(N0))
2592     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2593 
2594   // fold (A - (0-B)) -> A+B
2595   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2596     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2597 
2598   // fold A-(A-B) -> B
2599   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2600     return N1.getOperand(1);
2601 
2602   // fold (A+B)-A -> B
2603   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2604     return N0.getOperand(1);
2605 
2606   // fold (A+B)-B -> A
2607   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2608     return N0.getOperand(0);
2609 
2610   // fold C2-(A+C1) -> (C2-C1)-A
2611   if (N1.getOpcode() == ISD::ADD) {
2612     SDValue N11 = N1.getOperand(1);
2613     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2614         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2615       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2616       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2617     }
2618   }
2619 
2620   // fold ((A+(B+or-C))-B) -> A+or-C
2621   if (N0.getOpcode() == ISD::ADD &&
2622       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2623        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2624       N0.getOperand(1).getOperand(0) == N1)
2625     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2626                        N0.getOperand(1).getOperand(1));
2627 
2628   // fold ((A+(C+B))-B) -> A+C
2629   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2630       N0.getOperand(1).getOperand(1) == N1)
2631     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2632                        N0.getOperand(1).getOperand(0));
2633 
2634   // fold ((A-(B-C))-C) -> A-B
2635   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2636       N0.getOperand(1).getOperand(1) == N1)
2637     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2638                        N0.getOperand(1).getOperand(0));
2639 
2640   // fold (A-(B-C)) -> A+(C-B)
2641   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2642     return DAG.getNode(ISD::ADD, DL, VT, N0,
2643                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2644                                    N1.getOperand(0)));
2645 
2646   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2647   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2648     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2649         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2650       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2651                                 N1.getOperand(0).getOperand(1),
2652                                 N1.getOperand(1));
2653       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2654     }
2655     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2656         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2657       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2658                                 N1.getOperand(0),
2659                                 N1.getOperand(1).getOperand(1));
2660       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2661     }
2662   }
2663 
2664   // If either operand of a sub is undef, the result is undef
2665   if (N0.isUndef())
2666     return N0;
2667   if (N1.isUndef())
2668     return N1;
2669 
2670   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2671     return V;
2672 
2673   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2674     return V;
2675 
2676   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2677   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2678     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2679       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2680       SDValue S0 = N1.getOperand(0);
2681       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2682         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2683         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2684           if (C->getAPIntValue() == (OpSizeInBits - 1))
2685             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2686       }
2687     }
2688   }
2689 
2690   // If the relocation model supports it, consider symbol offsets.
2691   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2692     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2693       // fold (sub Sym, c) -> Sym-c
2694       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2695         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2696                                     GA->getOffset() -
2697                                         (uint64_t)N1C->getSExtValue());
2698       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2699       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2700         if (GA->getGlobal() == GB->getGlobal())
2701           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2702                                  DL, VT);
2703     }
2704 
2705   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2706   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2707     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2708     if (TN->getVT() == MVT::i1) {
2709       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2710                                  DAG.getConstant(1, DL, VT));
2711       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2712     }
2713   }
2714 
2715   // Prefer an add for more folding potential and possibly better codegen:
2716   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2717   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2718     SDValue ShAmt = N1.getOperand(1);
2719     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2720     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2721       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2722       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2723     }
2724   }
2725 
2726   return SDValue();
2727 }
2728 
2729 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2730   SDValue N0 = N->getOperand(0);
2731   SDValue N1 = N->getOperand(1);
2732   EVT VT = N0.getValueType();
2733   SDLoc DL(N);
2734 
2735   // If the flag result is dead, turn this into an SUB.
2736   if (!N->hasAnyUseOfValue(1))
2737     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2738                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2739 
2740   // fold (subc x, x) -> 0 + no borrow
2741   if (N0 == N1)
2742     return CombineTo(N, DAG.getConstant(0, DL, VT),
2743                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2744 
2745   // fold (subc x, 0) -> x + no borrow
2746   if (isNullConstant(N1))
2747     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2748 
2749   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2750   if (isAllOnesConstant(N0))
2751     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2752                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2753 
2754   return SDValue();
2755 }
2756 
2757 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2758   SDValue N0 = N->getOperand(0);
2759   SDValue N1 = N->getOperand(1);
2760   EVT VT = N0.getValueType();
2761   if (VT.isVector())
2762     return SDValue();
2763 
2764   EVT CarryVT = N->getValueType(1);
2765   SDLoc DL(N);
2766 
2767   // If the flag result is dead, turn this into an SUB.
2768   if (!N->hasAnyUseOfValue(1))
2769     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2770                      DAG.getUNDEF(CarryVT));
2771 
2772   // fold (usubo x, x) -> 0 + no borrow
2773   if (N0 == N1)
2774     return CombineTo(N, DAG.getConstant(0, DL, VT),
2775                      DAG.getConstant(0, DL, CarryVT));
2776 
2777   // fold (usubo x, 0) -> x + no borrow
2778   if (isNullConstant(N1))
2779     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2780 
2781   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2782   if (isAllOnesConstant(N0))
2783     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2784                      DAG.getConstant(0, DL, CarryVT));
2785 
2786   return SDValue();
2787 }
2788 
2789 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2790   SDValue N0 = N->getOperand(0);
2791   SDValue N1 = N->getOperand(1);
2792   SDValue CarryIn = N->getOperand(2);
2793 
2794   // fold (sube x, y, false) -> (subc x, y)
2795   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2796     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2797 
2798   return SDValue();
2799 }
2800 
2801 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2802   SDValue N0 = N->getOperand(0);
2803   SDValue N1 = N->getOperand(1);
2804   SDValue CarryIn = N->getOperand(2);
2805 
2806   // fold (subcarry x, y, false) -> (usubo x, y)
2807   if (isNullConstant(CarryIn)) {
2808     if (!LegalOperations ||
2809         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2810       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2811   }
2812 
2813   return SDValue();
2814 }
2815 
2816 SDValue DAGCombiner::visitMUL(SDNode *N) {
2817   SDValue N0 = N->getOperand(0);
2818   SDValue N1 = N->getOperand(1);
2819   EVT VT = N0.getValueType();
2820 
2821   // fold (mul x, undef) -> 0
2822   if (N0.isUndef() || N1.isUndef())
2823     return DAG.getConstant(0, SDLoc(N), VT);
2824 
2825   bool N0IsConst = false;
2826   bool N1IsConst = false;
2827   bool N1IsOpaqueConst = false;
2828   bool N0IsOpaqueConst = false;
2829   APInt ConstValue0, ConstValue1;
2830   // fold vector ops
2831   if (VT.isVector()) {
2832     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2833       return FoldedVOp;
2834 
2835     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2836     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2837     assert((!N0IsConst ||
2838             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2839            "Splat APInt should be element width");
2840     assert((!N1IsConst ||
2841             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2842            "Splat APInt should be element width");
2843   } else {
2844     N0IsConst = isa<ConstantSDNode>(N0);
2845     if (N0IsConst) {
2846       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2847       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2848     }
2849     N1IsConst = isa<ConstantSDNode>(N1);
2850     if (N1IsConst) {
2851       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2852       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2853     }
2854   }
2855 
2856   // fold (mul c1, c2) -> c1*c2
2857   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2858     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2859                                       N0.getNode(), N1.getNode());
2860 
2861   // canonicalize constant to RHS (vector doesn't have to splat)
2862   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2863      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2864     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2865   // fold (mul x, 0) -> 0
2866   if (N1IsConst && ConstValue1.isNullValue())
2867     return N1;
2868   // fold (mul x, 1) -> x
2869   if (N1IsConst && ConstValue1.isOneValue())
2870     return N0;
2871 
2872   if (SDValue NewSel = foldBinOpIntoSelect(N))
2873     return NewSel;
2874 
2875   // fold (mul x, -1) -> 0-x
2876   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2877     SDLoc DL(N);
2878     return DAG.getNode(ISD::SUB, DL, VT,
2879                        DAG.getConstant(0, DL, VT), N0);
2880   }
2881   // fold (mul x, (1 << c)) -> x << c
2882   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2883       DAG.isKnownToBeAPowerOfTwo(N1) &&
2884       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2885     SDLoc DL(N);
2886     SDValue LogBase2 = BuildLogBase2(N1, DL);
2887     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2888     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2889     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2890   }
2891   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2892   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2893     unsigned Log2Val = (-ConstValue1).logBase2();
2894     SDLoc DL(N);
2895     // FIXME: If the input is something that is easily negated (e.g. a
2896     // single-use add), we should put the negate there.
2897     return DAG.getNode(ISD::SUB, DL, VT,
2898                        DAG.getConstant(0, DL, VT),
2899                        DAG.getNode(ISD::SHL, DL, VT, N0,
2900                             DAG.getConstant(Log2Val, DL,
2901                                       getShiftAmountTy(N0.getValueType()))));
2902   }
2903 
2904   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2905   // mul x, (2^N + 1) --> add (shl x, N), x
2906   // mul x, (2^N - 1) --> sub (shl x, N), x
2907   // Examples: x * 33 --> (x << 5) + x
2908   //           x * 15 --> (x << 4) - x
2909   //           x * -33 --> -((x << 5) + x)
2910   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
2911   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
2912     // TODO: We could handle more general decomposition of any constant by
2913     //       having the target set a limit on number of ops and making a
2914     //       callback to determine that sequence (similar to sqrt expansion).
2915     unsigned MathOp = ISD::DELETED_NODE;
2916     APInt MulC = ConstValue1.abs();
2917     if ((MulC - 1).isPowerOf2())
2918       MathOp = ISD::ADD;
2919     else if ((MulC + 1).isPowerOf2())
2920       MathOp = ISD::SUB;
2921 
2922     if (MathOp != ISD::DELETED_NODE) {
2923       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
2924                                           : (MulC + 1).logBase2();
2925       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
2926              "Not expecting multiply-by-constant that could have simplified");
2927       SDLoc DL(N);
2928       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
2929                                 DAG.getConstant(ShAmt, DL, VT));
2930       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
2931       if (ConstValue1.isNegative())
2932         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
2933       return R;
2934     }
2935   }
2936 
2937   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2938   if (N0.getOpcode() == ISD::SHL &&
2939       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2940       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2941     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2942     if (isConstantOrConstantVector(C3))
2943       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2944   }
2945 
2946   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2947   // use.
2948   {
2949     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2950 
2951     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2952     if (N0.getOpcode() == ISD::SHL &&
2953         isConstantOrConstantVector(N0.getOperand(1)) &&
2954         N0.getNode()->hasOneUse()) {
2955       Sh = N0; Y = N1;
2956     } else if (N1.getOpcode() == ISD::SHL &&
2957                isConstantOrConstantVector(N1.getOperand(1)) &&
2958                N1.getNode()->hasOneUse()) {
2959       Sh = N1; Y = N0;
2960     }
2961 
2962     if (Sh.getNode()) {
2963       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2964       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2965     }
2966   }
2967 
2968   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2969   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2970       N0.getOpcode() == ISD::ADD &&
2971       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2972       isMulAddWithConstProfitable(N, N0, N1))
2973       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2974                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2975                                      N0.getOperand(0), N1),
2976                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2977                                      N0.getOperand(1), N1));
2978 
2979   // reassociate mul
2980   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
2981     return RMUL;
2982 
2983   return SDValue();
2984 }
2985 
2986 /// Return true if divmod libcall is available.
2987 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2988                                      const TargetLowering &TLI) {
2989   RTLIB::Libcall LC;
2990   EVT NodeType = Node->getValueType(0);
2991   if (!NodeType.isSimple())
2992     return false;
2993   switch (NodeType.getSimpleVT().SimpleTy) {
2994   default: return false; // No libcall for vector types.
2995   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2996   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2997   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2998   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2999   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3000   }
3001 
3002   return TLI.getLibcallName(LC) != nullptr;
3003 }
3004 
3005 /// Issue divrem if both quotient and remainder are needed.
3006 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3007   if (Node->use_empty())
3008     return SDValue(); // This is a dead node, leave it alone.
3009 
3010   unsigned Opcode = Node->getOpcode();
3011   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3012   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3013 
3014   // DivMod lib calls can still work on non-legal types if using lib-calls.
3015   EVT VT = Node->getValueType(0);
3016   if (VT.isVector() || !VT.isInteger())
3017     return SDValue();
3018 
3019   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3020     return SDValue();
3021 
3022   // If DIVREM is going to get expanded into a libcall,
3023   // but there is no libcall available, then don't combine.
3024   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3025       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3026     return SDValue();
3027 
3028   // If div is legal, it's better to do the normal expansion
3029   unsigned OtherOpcode = 0;
3030   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3031     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3032     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3033       return SDValue();
3034   } else {
3035     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3036     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3037       return SDValue();
3038   }
3039 
3040   SDValue Op0 = Node->getOperand(0);
3041   SDValue Op1 = Node->getOperand(1);
3042   SDValue combined;
3043   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3044          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3045     SDNode *User = *UI;
3046     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3047         User->use_empty())
3048       continue;
3049     // Convert the other matching node(s), too;
3050     // otherwise, the DIVREM may get target-legalized into something
3051     // target-specific that we won't be able to recognize.
3052     unsigned UserOpc = User->getOpcode();
3053     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3054         User->getOperand(0) == Op0 &&
3055         User->getOperand(1) == Op1) {
3056       if (!combined) {
3057         if (UserOpc == OtherOpcode) {
3058           SDVTList VTs = DAG.getVTList(VT, VT);
3059           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3060         } else if (UserOpc == DivRemOpc) {
3061           combined = SDValue(User, 0);
3062         } else {
3063           assert(UserOpc == Opcode);
3064           continue;
3065         }
3066       }
3067       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3068         CombineTo(User, combined);
3069       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3070         CombineTo(User, combined.getValue(1));
3071     }
3072   }
3073   return combined;
3074 }
3075 
3076 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3077   SDValue N0 = N->getOperand(0);
3078   SDValue N1 = N->getOperand(1);
3079   EVT VT = N->getValueType(0);
3080   SDLoc DL(N);
3081 
3082   unsigned Opc = N->getOpcode();
3083   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3084   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3085 
3086   // X / undef -> undef
3087   // X % undef -> undef
3088   // X / 0 -> undef
3089   // X % 0 -> undef
3090   // NOTE: This includes vectors where any divisor element is zero/undef.
3091   if (DAG.isUndef(Opc, {N0, N1}))
3092     return DAG.getUNDEF(VT);
3093 
3094   // undef / X -> 0
3095   // undef % X -> 0
3096   if (N0.isUndef())
3097     return DAG.getConstant(0, DL, VT);
3098 
3099   // 0 / X -> 0
3100   // 0 % X -> 0
3101   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3102   if (N0C && N0C->isNullValue())
3103     return N0;
3104 
3105   // X / X -> 1
3106   // X % X -> 0
3107   if (N0 == N1)
3108     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3109 
3110   // X / 1 -> X
3111   // X % 1 -> 0
3112   // If this is a boolean op (single-bit element type), we can't have
3113   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3114   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3115   // it's a 1.
3116   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3117     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3118 
3119   return SDValue();
3120 }
3121 
3122 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3123   SDValue N0 = N->getOperand(0);
3124   SDValue N1 = N->getOperand(1);
3125   EVT VT = N->getValueType(0);
3126   EVT CCVT = getSetCCResultType(VT);
3127 
3128   // fold vector ops
3129   if (VT.isVector())
3130     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3131       return FoldedVOp;
3132 
3133   SDLoc DL(N);
3134 
3135   // fold (sdiv c1, c2) -> c1/c2
3136   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3137   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3138   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3139     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3140   // fold (sdiv X, -1) -> 0-X
3141   if (N1C && N1C->isAllOnesValue())
3142     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3143   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3144   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3145     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3146                          DAG.getConstant(1, DL, VT),
3147                          DAG.getConstant(0, DL, VT));
3148 
3149   if (SDValue V = simplifyDivRem(N, DAG))
3150     return V;
3151 
3152   if (SDValue NewSel = foldBinOpIntoSelect(N))
3153     return NewSel;
3154 
3155   // If we know the sign bits of both operands are zero, strength reduce to a
3156   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3157   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3158     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3159 
3160   if (SDValue V = visitSDIVLike(N0, N1, N))
3161     return V;
3162 
3163   // sdiv, srem -> sdivrem
3164   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3165   // true.  Otherwise, we break the simplification logic in visitREM().
3166   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3167   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3168     if (SDValue DivRem = useDivRem(N))
3169         return DivRem;
3170 
3171   return SDValue();
3172 }
3173 
3174 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3175   SDLoc DL(N);
3176   EVT VT = N->getValueType(0);
3177   EVT CCVT = getSetCCResultType(VT);
3178   unsigned BitWidth = VT.getScalarSizeInBits();
3179 
3180   // Helper for determining whether a value is a power-2 constant scalar or a
3181   // vector of such elements.
3182   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3183     if (C->isNullValue() || C->isOpaque())
3184       return false;
3185     if (C->getAPIntValue().isPowerOf2())
3186       return true;
3187     if ((-C->getAPIntValue()).isPowerOf2())
3188       return true;
3189     return false;
3190   };
3191 
3192   // fold (sdiv X, pow2) -> simple ops after legalize
3193   // FIXME: We check for the exact bit here because the generic lowering gives
3194   // better results in that case. The target-specific lowering should learn how
3195   // to handle exact sdivs efficiently.
3196   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3197     // Target-specific implementation of sdiv x, pow2.
3198     if (SDValue Res = BuildSDIVPow2(N))
3199       return Res;
3200 
3201     // Create constants that are functions of the shift amount value.
3202     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3203     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3204     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3205     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3206     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3207     if (!isConstantOrConstantVector(Inexact))
3208       return SDValue();
3209 
3210     // Splat the sign bit into the register
3211     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3212                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3213     AddToWorklist(Sign.getNode());
3214 
3215     // Add (N0 < 0) ? abs2 - 1 : 0;
3216     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3217     AddToWorklist(Srl.getNode());
3218     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3219     AddToWorklist(Add.getNode());
3220     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3221     AddToWorklist(Sra.getNode());
3222 
3223     // Special case: (sdiv X, 1) -> X
3224     // Special Case: (sdiv X, -1) -> 0-X
3225     SDValue One = DAG.getConstant(1, DL, VT);
3226     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3227     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3228     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3229     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3230     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3231 
3232     // If dividing by a positive value, we're done. Otherwise, the result must
3233     // be negated.
3234     SDValue Zero = DAG.getConstant(0, DL, VT);
3235     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3236 
3237     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3238     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3239     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3240     return Res;
3241   }
3242 
3243   // If integer divide is expensive and we satisfy the requirements, emit an
3244   // alternate sequence.  Targets may check function attributes for size/speed
3245   // trade-offs.
3246   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3247   if (isConstantOrConstantVector(N1) &&
3248       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3249     if (SDValue Op = BuildSDIV(N))
3250       return Op;
3251 
3252   return SDValue();
3253 }
3254 
3255 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3256   SDValue N0 = N->getOperand(0);
3257   SDValue N1 = N->getOperand(1);
3258   EVT VT = N->getValueType(0);
3259   EVT CCVT = getSetCCResultType(VT);
3260 
3261   // fold vector ops
3262   if (VT.isVector())
3263     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3264       return FoldedVOp;
3265 
3266   SDLoc DL(N);
3267 
3268   // fold (udiv c1, c2) -> c1/c2
3269   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3270   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3271   if (N0C && N1C)
3272     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3273                                                     N0C, N1C))
3274       return Folded;
3275   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3276   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3277     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3278                          DAG.getConstant(1, DL, VT),
3279                          DAG.getConstant(0, DL, VT));
3280 
3281   if (SDValue V = simplifyDivRem(N, DAG))
3282     return V;
3283 
3284   if (SDValue NewSel = foldBinOpIntoSelect(N))
3285     return NewSel;
3286 
3287   if (SDValue V = visitUDIVLike(N0, N1, N))
3288     return V;
3289 
3290   // sdiv, srem -> sdivrem
3291   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3292   // true.  Otherwise, we break the simplification logic in visitREM().
3293   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3294   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3295     if (SDValue DivRem = useDivRem(N))
3296         return DivRem;
3297 
3298   return SDValue();
3299 }
3300 
3301 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3302   SDLoc DL(N);
3303   EVT VT = N->getValueType(0);
3304 
3305   // fold (udiv x, (1 << c)) -> x >>u c
3306   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3307       DAG.isKnownToBeAPowerOfTwo(N1)) {
3308     SDValue LogBase2 = BuildLogBase2(N1, DL);
3309     AddToWorklist(LogBase2.getNode());
3310 
3311     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3312     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3313     AddToWorklist(Trunc.getNode());
3314     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3315   }
3316 
3317   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3318   if (N1.getOpcode() == ISD::SHL) {
3319     SDValue N10 = N1.getOperand(0);
3320     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3321         DAG.isKnownToBeAPowerOfTwo(N10)) {
3322       SDValue LogBase2 = BuildLogBase2(N10, DL);
3323       AddToWorklist(LogBase2.getNode());
3324 
3325       EVT ADDVT = N1.getOperand(1).getValueType();
3326       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3327       AddToWorklist(Trunc.getNode());
3328       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3329       AddToWorklist(Add.getNode());
3330       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3331     }
3332   }
3333 
3334   // fold (udiv x, c) -> alternate
3335   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3336   if (isConstantOrConstantVector(N1) &&
3337       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3338     if (SDValue Op = BuildUDIV(N))
3339       return Op;
3340 
3341   return SDValue();
3342 }
3343 
3344 // handles ISD::SREM and ISD::UREM
3345 SDValue DAGCombiner::visitREM(SDNode *N) {
3346   unsigned Opcode = N->getOpcode();
3347   SDValue N0 = N->getOperand(0);
3348   SDValue N1 = N->getOperand(1);
3349   EVT VT = N->getValueType(0);
3350   EVT CCVT = getSetCCResultType(VT);
3351 
3352   bool isSigned = (Opcode == ISD::SREM);
3353   SDLoc DL(N);
3354 
3355   // fold (rem c1, c2) -> c1%c2
3356   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3357   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3358   if (N0C && N1C)
3359     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3360       return Folded;
3361   // fold (urem X, -1) -> select(X == -1, 0, x)
3362   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3363     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3364                          DAG.getConstant(0, DL, VT), N0);
3365 
3366   if (SDValue V = simplifyDivRem(N, DAG))
3367     return V;
3368 
3369   if (SDValue NewSel = foldBinOpIntoSelect(N))
3370     return NewSel;
3371 
3372   if (isSigned) {
3373     // If we know the sign bits of both operands are zero, strength reduce to a
3374     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3375     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3376       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3377   } else {
3378     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3379     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3380       // fold (urem x, pow2) -> (and x, pow2-1)
3381       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3382       AddToWorklist(Add.getNode());
3383       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3384     }
3385     if (N1.getOpcode() == ISD::SHL &&
3386         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3387       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3388       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3389       AddToWorklist(Add.getNode());
3390       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3391     }
3392   }
3393 
3394   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3395 
3396   // If X/C can be simplified by the division-by-constant logic, lower
3397   // X%C to the equivalent of X-X/C*C.
3398   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3399   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3400   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3401   // combine will not return a DIVREM.  Regardless, checking cheapness here
3402   // makes sense since the simplification results in fatter code.
3403   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3404     SDValue OptimizedDiv =
3405         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3406     if (OptimizedDiv.getNode()) {
3407       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3408       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3409       AddToWorklist(OptimizedDiv.getNode());
3410       AddToWorklist(Mul.getNode());
3411       return Sub;
3412     }
3413   }
3414 
3415   // sdiv, srem -> sdivrem
3416   if (SDValue DivRem = useDivRem(N))
3417     return DivRem.getValue(1);
3418 
3419   return SDValue();
3420 }
3421 
3422 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3423   SDValue N0 = N->getOperand(0);
3424   SDValue N1 = N->getOperand(1);
3425   EVT VT = N->getValueType(0);
3426   SDLoc DL(N);
3427 
3428   if (VT.isVector()) {
3429     // fold (mulhs x, 0) -> 0
3430     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3431       return N1;
3432     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3433       return N0;
3434   }
3435 
3436   // fold (mulhs x, 0) -> 0
3437   if (isNullConstant(N1))
3438     return N1;
3439   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3440   if (isOneConstant(N1))
3441     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3442                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3443                                        getShiftAmountTy(N0.getValueType())));
3444 
3445   // fold (mulhs x, undef) -> 0
3446   if (N0.isUndef() || N1.isUndef())
3447     return DAG.getConstant(0, DL, VT);
3448 
3449   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3450   // plus a shift.
3451   if (VT.isSimple() && !VT.isVector()) {
3452     MVT Simple = VT.getSimpleVT();
3453     unsigned SimpleSize = Simple.getSizeInBits();
3454     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3455     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3456       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3457       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3458       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3459       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3460             DAG.getConstant(SimpleSize, DL,
3461                             getShiftAmountTy(N1.getValueType())));
3462       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3463     }
3464   }
3465 
3466   return SDValue();
3467 }
3468 
3469 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3470   SDValue N0 = N->getOperand(0);
3471   SDValue N1 = N->getOperand(1);
3472   EVT VT = N->getValueType(0);
3473   SDLoc DL(N);
3474 
3475   if (VT.isVector()) {
3476     // fold (mulhu x, 0) -> 0
3477     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3478       return N1;
3479     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3480       return N0;
3481   }
3482 
3483   // fold (mulhu x, 0) -> 0
3484   if (isNullConstant(N1))
3485     return N1;
3486   // fold (mulhu x, 1) -> 0
3487   if (isOneConstant(N1))
3488     return DAG.getConstant(0, DL, N0.getValueType());
3489   // fold (mulhu x, undef) -> 0
3490   if (N0.isUndef() || N1.isUndef())
3491     return DAG.getConstant(0, DL, VT);
3492 
3493   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3494   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3495       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3496     SDLoc DL(N);
3497     unsigned NumEltBits = VT.getScalarSizeInBits();
3498     SDValue LogBase2 = BuildLogBase2(N1, DL);
3499     SDValue SRLAmt = DAG.getNode(
3500         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3501     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3502     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3503     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3504   }
3505 
3506   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3507   // plus a shift.
3508   if (VT.isSimple() && !VT.isVector()) {
3509     MVT Simple = VT.getSimpleVT();
3510     unsigned SimpleSize = Simple.getSizeInBits();
3511     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3512     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3513       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3514       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3515       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3516       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3517             DAG.getConstant(SimpleSize, DL,
3518                             getShiftAmountTy(N1.getValueType())));
3519       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3520     }
3521   }
3522 
3523   return SDValue();
3524 }
3525 
3526 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3527 /// give the opcodes for the two computations that are being performed. Return
3528 /// true if a simplification was made.
3529 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3530                                                 unsigned HiOp) {
3531   // If the high half is not needed, just compute the low half.
3532   bool HiExists = N->hasAnyUseOfValue(1);
3533   if (!HiExists && (!LegalOperations ||
3534                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3535     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3536     return CombineTo(N, Res, Res);
3537   }
3538 
3539   // If the low half is not needed, just compute the high half.
3540   bool LoExists = N->hasAnyUseOfValue(0);
3541   if (!LoExists && (!LegalOperations ||
3542                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3543     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3544     return CombineTo(N, Res, Res);
3545   }
3546 
3547   // If both halves are used, return as it is.
3548   if (LoExists && HiExists)
3549     return SDValue();
3550 
3551   // If the two computed results can be simplified separately, separate them.
3552   if (LoExists) {
3553     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3554     AddToWorklist(Lo.getNode());
3555     SDValue LoOpt = combine(Lo.getNode());
3556     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3557         (!LegalOperations ||
3558          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3559       return CombineTo(N, LoOpt, LoOpt);
3560   }
3561 
3562   if (HiExists) {
3563     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3564     AddToWorklist(Hi.getNode());
3565     SDValue HiOpt = combine(Hi.getNode());
3566     if (HiOpt.getNode() && HiOpt != Hi &&
3567         (!LegalOperations ||
3568          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3569       return CombineTo(N, HiOpt, HiOpt);
3570   }
3571 
3572   return SDValue();
3573 }
3574 
3575 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3576   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3577     return Res;
3578 
3579   EVT VT = N->getValueType(0);
3580   SDLoc DL(N);
3581 
3582   // If the type is twice as wide is legal, transform the mulhu to a wider
3583   // multiply plus a shift.
3584   if (VT.isSimple() && !VT.isVector()) {
3585     MVT Simple = VT.getSimpleVT();
3586     unsigned SimpleSize = Simple.getSizeInBits();
3587     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3588     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3589       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3590       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3591       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3592       // Compute the high part as N1.
3593       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3594             DAG.getConstant(SimpleSize, DL,
3595                             getShiftAmountTy(Lo.getValueType())));
3596       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3597       // Compute the low part as N0.
3598       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3599       return CombineTo(N, Lo, Hi);
3600     }
3601   }
3602 
3603   return SDValue();
3604 }
3605 
3606 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3607   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3608     return Res;
3609 
3610   EVT VT = N->getValueType(0);
3611   SDLoc DL(N);
3612 
3613   // If the type is twice as wide is legal, transform the mulhu to a wider
3614   // multiply plus a shift.
3615   if (VT.isSimple() && !VT.isVector()) {
3616     MVT Simple = VT.getSimpleVT();
3617     unsigned SimpleSize = Simple.getSizeInBits();
3618     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3619     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3620       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3621       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3622       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3623       // Compute the high part as N1.
3624       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3625             DAG.getConstant(SimpleSize, DL,
3626                             getShiftAmountTy(Lo.getValueType())));
3627       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3628       // Compute the low part as N0.
3629       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3630       return CombineTo(N, Lo, Hi);
3631     }
3632   }
3633 
3634   return SDValue();
3635 }
3636 
3637 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3638   // (smulo x, 2) -> (saddo x, x)
3639   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3640     if (C2->getAPIntValue() == 2)
3641       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3642                          N->getOperand(0), N->getOperand(0));
3643 
3644   return SDValue();
3645 }
3646 
3647 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3648   // (umulo x, 2) -> (uaddo x, x)
3649   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3650     if (C2->getAPIntValue() == 2)
3651       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3652                          N->getOperand(0), N->getOperand(0));
3653 
3654   return SDValue();
3655 }
3656 
3657 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3658   SDValue N0 = N->getOperand(0);
3659   SDValue N1 = N->getOperand(1);
3660   EVT VT = N0.getValueType();
3661 
3662   // fold vector ops
3663   if (VT.isVector())
3664     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3665       return FoldedVOp;
3666 
3667   // fold operation with constant operands.
3668   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3669   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3670   if (N0C && N1C)
3671     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3672 
3673   // canonicalize constant to RHS
3674   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3675      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3676     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3677 
3678   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3679   // Only do this if the current op isn't legal and the flipped is.
3680   unsigned Opcode = N->getOpcode();
3681   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3682   if (!TLI.isOperationLegal(Opcode, VT) &&
3683       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3684       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3685     unsigned AltOpcode;
3686     switch (Opcode) {
3687     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3688     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3689     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3690     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3691     default: llvm_unreachable("Unknown MINMAX opcode");
3692     }
3693     if (TLI.isOperationLegal(AltOpcode, VT))
3694       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3695   }
3696 
3697   return SDValue();
3698 }
3699 
3700 /// If this is a binary operator with two operands of the same opcode, try to
3701 /// simplify it.
3702 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3703   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3704   EVT VT = N0.getValueType();
3705   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3706 
3707   // Bail early if none of these transforms apply.
3708   if (N0.getNumOperands() == 0) return SDValue();
3709 
3710   // For each of OP in AND/OR/XOR:
3711   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3712   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3713   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3714   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3715   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3716   //
3717   // do not sink logical op inside of a vector extend, since it may combine
3718   // into a vsetcc.
3719   EVT Op0VT = N0.getOperand(0).getValueType();
3720   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3721        N0.getOpcode() == ISD::SIGN_EXTEND ||
3722        N0.getOpcode() == ISD::BSWAP ||
3723        // Avoid infinite looping with PromoteIntBinOp.
3724        (N0.getOpcode() == ISD::ANY_EXTEND &&
3725         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3726        (N0.getOpcode() == ISD::TRUNCATE &&
3727         (!TLI.isZExtFree(VT, Op0VT) ||
3728          !TLI.isTruncateFree(Op0VT, VT)) &&
3729         TLI.isTypeLegal(Op0VT))) &&
3730       !VT.isVector() &&
3731       Op0VT == N1.getOperand(0).getValueType() &&
3732       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3733     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3734                                  N0.getOperand(0).getValueType(),
3735                                  N0.getOperand(0), N1.getOperand(0));
3736     AddToWorklist(ORNode.getNode());
3737     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3738   }
3739 
3740   // For each of OP in SHL/SRL/SRA/AND...
3741   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3742   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3743   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3744   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3745        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3746       N0.getOperand(1) == N1.getOperand(1)) {
3747     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3748                                  N0.getOperand(0).getValueType(),
3749                                  N0.getOperand(0), N1.getOperand(0));
3750     AddToWorklist(ORNode.getNode());
3751     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3752                        ORNode, N0.getOperand(1));
3753   }
3754 
3755   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3756   // Only perform this optimization up until type legalization, before
3757   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3758   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3759   // we don't want to undo this promotion.
3760   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3761   // on scalars.
3762   if ((N0.getOpcode() == ISD::BITCAST ||
3763        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3764        Level <= AfterLegalizeTypes) {
3765     SDValue In0 = N0.getOperand(0);
3766     SDValue In1 = N1.getOperand(0);
3767     EVT In0Ty = In0.getValueType();
3768     EVT In1Ty = In1.getValueType();
3769     SDLoc DL(N);
3770     // If both incoming values are integers, and the original types are the
3771     // same.
3772     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3773       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3774       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3775       AddToWorklist(Op.getNode());
3776       return BC;
3777     }
3778   }
3779 
3780   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3781   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3782   // If both shuffles use the same mask, and both shuffle within a single
3783   // vector, then it is worthwhile to move the swizzle after the operation.
3784   // The type-legalizer generates this pattern when loading illegal
3785   // vector types from memory. In many cases this allows additional shuffle
3786   // optimizations.
3787   // There are other cases where moving the shuffle after the xor/and/or
3788   // is profitable even if shuffles don't perform a swizzle.
3789   // If both shuffles use the same mask, and both shuffles have the same first
3790   // or second operand, then it might still be profitable to move the shuffle
3791   // after the xor/and/or operation.
3792   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3793     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3794     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3795 
3796     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3797            "Inputs to shuffles are not the same type");
3798 
3799     // Check that both shuffles use the same mask. The masks are known to be of
3800     // the same length because the result vector type is the same.
3801     // Check also that shuffles have only one use to avoid introducing extra
3802     // instructions.
3803     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3804         SVN0->getMask().equals(SVN1->getMask())) {
3805       SDValue ShOp = N0->getOperand(1);
3806 
3807       // Don't try to fold this node if it requires introducing a
3808       // build vector of all zeros that might be illegal at this stage.
3809       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3810         ShOp = tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
3811       }
3812 
3813       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3814       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
3815       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3816       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3817         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3818                                       N0->getOperand(0), N1->getOperand(0));
3819         AddToWorklist(NewNode.getNode());
3820         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3821                                     SVN0->getMask());
3822       }
3823 
3824       // Don't try to fold this node if it requires introducing a
3825       // build vector of all zeros that might be illegal at this stage.
3826       ShOp = N0->getOperand(0);
3827       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3828         ShOp = tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations);
3829       }
3830 
3831       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3832       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
3833       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3834       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3835         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3836                                       N0->getOperand(1), N1->getOperand(1));
3837         AddToWorklist(NewNode.getNode());
3838         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3839                                     SVN0->getMask());
3840       }
3841     }
3842   }
3843 
3844   return SDValue();
3845 }
3846 
3847 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3848 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3849                                        const SDLoc &DL) {
3850   SDValue LL, LR, RL, RR, N0CC, N1CC;
3851   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3852       !isSetCCEquivalent(N1, RL, RR, N1CC))
3853     return SDValue();
3854 
3855   assert(N0.getValueType() == N1.getValueType() &&
3856          "Unexpected operand types for bitwise logic op");
3857   assert(LL.getValueType() == LR.getValueType() &&
3858          RL.getValueType() == RR.getValueType() &&
3859          "Unexpected operand types for setcc");
3860 
3861   // If we're here post-legalization or the logic op type is not i1, the logic
3862   // op type must match a setcc result type. Also, all folds require new
3863   // operations on the left and right operands, so those types must match.
3864   EVT VT = N0.getValueType();
3865   EVT OpVT = LL.getValueType();
3866   if (LegalOperations || VT.getScalarType() != MVT::i1)
3867     if (VT != getSetCCResultType(OpVT))
3868       return SDValue();
3869   if (OpVT != RL.getValueType())
3870     return SDValue();
3871 
3872   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3873   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3874   bool IsInteger = OpVT.isInteger();
3875   if (LR == RR && CC0 == CC1 && IsInteger) {
3876     bool IsZero = isNullOrNullSplat(LR);
3877     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
3878 
3879     // All bits clear?
3880     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3881     // All sign bits clear?
3882     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3883     // Any bits set?
3884     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3885     // Any sign bits set?
3886     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3887 
3888     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3889     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3890     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3891     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3892     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3893       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3894       AddToWorklist(Or.getNode());
3895       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3896     }
3897 
3898     // All bits set?
3899     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3900     // All sign bits set?
3901     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3902     // Any bits clear?
3903     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3904     // Any sign bits clear?
3905     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3906 
3907     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3908     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3909     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3910     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3911     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3912       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3913       AddToWorklist(And.getNode());
3914       return DAG.getSetCC(DL, VT, And, LR, CC1);
3915     }
3916   }
3917 
3918   // TODO: What is the 'or' equivalent of this fold?
3919   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3920   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3921       IsInteger && CC0 == ISD::SETNE &&
3922       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3923        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3924     SDValue One = DAG.getConstant(1, DL, OpVT);
3925     SDValue Two = DAG.getConstant(2, DL, OpVT);
3926     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3927     AddToWorklist(Add.getNode());
3928     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3929   }
3930 
3931   // Try more general transforms if the predicates match and the only user of
3932   // the compares is the 'and' or 'or'.
3933   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3934       N0.hasOneUse() && N1.hasOneUse()) {
3935     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3936     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3937     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3938       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3939       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3940       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3941       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3942       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3943     }
3944   }
3945 
3946   // Canonicalize equivalent operands to LL == RL.
3947   if (LL == RR && LR == RL) {
3948     CC1 = ISD::getSetCCSwappedOperands(CC1);
3949     std::swap(RL, RR);
3950   }
3951 
3952   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3953   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3954   if (LL == RL && LR == RR) {
3955     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3956                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3957     if (NewCC != ISD::SETCC_INVALID &&
3958         (!LegalOperations ||
3959          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3960           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3961       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3962   }
3963 
3964   return SDValue();
3965 }
3966 
3967 /// This contains all DAGCombine rules which reduce two values combined by
3968 /// an And operation to a single value. This makes them reusable in the context
3969 /// of visitSELECT(). Rules involving constants are not included as
3970 /// visitSELECT() already handles those cases.
3971 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3972   EVT VT = N1.getValueType();
3973   SDLoc DL(N);
3974 
3975   // fold (and x, undef) -> 0
3976   if (N0.isUndef() || N1.isUndef())
3977     return DAG.getConstant(0, DL, VT);
3978 
3979   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3980     return V;
3981 
3982   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3983       VT.getSizeInBits() <= 64) {
3984     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3985       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3986         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3987         // immediate for an add, but it is legal if its top c2 bits are set,
3988         // transform the ADD so the immediate doesn't need to be materialized
3989         // in a register.
3990         APInt ADDC = ADDI->getAPIntValue();
3991         APInt SRLC = SRLI->getAPIntValue();
3992         if (ADDC.getMinSignedBits() <= 64 &&
3993             SRLC.ult(VT.getSizeInBits()) &&
3994             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3995           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3996                                              SRLC.getZExtValue());
3997           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3998             ADDC |= Mask;
3999             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4000               SDLoc DL0(N0);
4001               SDValue NewAdd =
4002                 DAG.getNode(ISD::ADD, DL0, VT,
4003                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4004               CombineTo(N0.getNode(), NewAdd);
4005               // Return N so it doesn't get rechecked!
4006               return SDValue(N, 0);
4007             }
4008           }
4009         }
4010       }
4011     }
4012   }
4013 
4014   // Reduce bit extract of low half of an integer to the narrower type.
4015   // (and (srl i64:x, K), KMask) ->
4016   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4017   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4018     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4019       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4020         unsigned Size = VT.getSizeInBits();
4021         const APInt &AndMask = CAnd->getAPIntValue();
4022         unsigned ShiftBits = CShift->getZExtValue();
4023 
4024         // Bail out, this node will probably disappear anyway.
4025         if (ShiftBits == 0)
4026           return SDValue();
4027 
4028         unsigned MaskBits = AndMask.countTrailingOnes();
4029         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4030 
4031         if (AndMask.isMask() &&
4032             // Required bits must not span the two halves of the integer and
4033             // must fit in the half size type.
4034             (ShiftBits + MaskBits <= Size / 2) &&
4035             TLI.isNarrowingProfitable(VT, HalfVT) &&
4036             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4037             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4038             TLI.isTruncateFree(VT, HalfVT) &&
4039             TLI.isZExtFree(HalfVT, VT)) {
4040           // The isNarrowingProfitable is to avoid regressions on PPC and
4041           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4042           // on downstream users of this. Those patterns could probably be
4043           // extended to handle extensions mixed in.
4044 
4045           SDValue SL(N0);
4046           assert(MaskBits <= Size);
4047 
4048           // Extracting the highest bit of the low half.
4049           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4050           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4051                                       N0.getOperand(0));
4052 
4053           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4054           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4055           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4056           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4057           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4058         }
4059       }
4060     }
4061   }
4062 
4063   return SDValue();
4064 }
4065 
4066 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4067                                    EVT LoadResultTy, EVT &ExtVT) {
4068   if (!AndC->getAPIntValue().isMask())
4069     return false;
4070 
4071   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4072 
4073   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4074   EVT LoadedVT = LoadN->getMemoryVT();
4075 
4076   if (ExtVT == LoadedVT &&
4077       (!LegalOperations ||
4078        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4079     // ZEXTLOAD will match without needing to change the size of the value being
4080     // loaded.
4081     return true;
4082   }
4083 
4084   // Do not change the width of a volatile load.
4085   if (LoadN->isVolatile())
4086     return false;
4087 
4088   // Do not generate loads of non-round integer types since these can
4089   // be expensive (and would be wrong if the type is not byte sized).
4090   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4091     return false;
4092 
4093   if (LegalOperations &&
4094       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4095     return false;
4096 
4097   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4098     return false;
4099 
4100   return true;
4101 }
4102 
4103 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4104                                     ISD::LoadExtType ExtType, EVT &MemVT,
4105                                     unsigned ShAmt) {
4106   if (!LDST)
4107     return false;
4108   // Only allow byte offsets.
4109   if (ShAmt % 8)
4110     return false;
4111 
4112   // Do not generate loads of non-round integer types since these can
4113   // be expensive (and would be wrong if the type is not byte sized).
4114   if (!MemVT.isRound())
4115     return false;
4116 
4117   // Don't change the width of a volatile load.
4118   if (LDST->isVolatile())
4119     return false;
4120 
4121   // Verify that we are actually reducing a load width here.
4122   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4123     return false;
4124 
4125   // Ensure that this isn't going to produce an unsupported unaligned access.
4126   if (ShAmt &&
4127       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4128                               LDST->getAddressSpace(), ShAmt / 8))
4129     return false;
4130 
4131   // It's not possible to generate a constant of extended or untyped type.
4132   EVT PtrType = LDST->getBasePtr().getValueType();
4133   if (PtrType == MVT::Untyped || PtrType.isExtended())
4134     return false;
4135 
4136   if (isa<LoadSDNode>(LDST)) {
4137     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4138     // Don't transform one with multiple uses, this would require adding a new
4139     // load.
4140     if (!SDValue(Load, 0).hasOneUse())
4141       return false;
4142 
4143     if (LegalOperations &&
4144         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4145       return false;
4146 
4147     // For the transform to be legal, the load must produce only two values
4148     // (the value loaded and the chain).  Don't transform a pre-increment
4149     // load, for example, which produces an extra value.  Otherwise the
4150     // transformation is not equivalent, and the downstream logic to replace
4151     // uses gets things wrong.
4152     if (Load->getNumValues() > 2)
4153       return false;
4154 
4155     // If the load that we're shrinking is an extload and we're not just
4156     // discarding the extension we can't simply shrink the load. Bail.
4157     // TODO: It would be possible to merge the extensions in some cases.
4158     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4159         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4160       return false;
4161 
4162     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4163       return false;
4164   } else {
4165     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4166     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4167     // Can't write outside the original store
4168     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4169       return false;
4170 
4171     if (LegalOperations &&
4172         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4173       return false;
4174   }
4175   return true;
4176 }
4177 
4178 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4179                                     SmallVectorImpl<LoadSDNode*> &Loads,
4180                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4181                                     ConstantSDNode *Mask,
4182                                     SDNode *&NodeToMask) {
4183   // Recursively search for the operands, looking for loads which can be
4184   // narrowed.
4185   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4186     SDValue Op = N->getOperand(i);
4187 
4188     if (Op.getValueType().isVector())
4189       return false;
4190 
4191     // Some constants may need fixing up later if they are too large.
4192     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4193       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4194           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4195         NodesWithConsts.insert(N);
4196       continue;
4197     }
4198 
4199     if (!Op.hasOneUse())
4200       return false;
4201 
4202     switch(Op.getOpcode()) {
4203     case ISD::LOAD: {
4204       auto *Load = cast<LoadSDNode>(Op);
4205       EVT ExtVT;
4206       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4207           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4208 
4209         // ZEXTLOAD is already small enough.
4210         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4211             ExtVT.bitsGE(Load->getMemoryVT()))
4212           continue;
4213 
4214         // Use LE to convert equal sized loads to zext.
4215         if (ExtVT.bitsLE(Load->getMemoryVT()))
4216           Loads.push_back(Load);
4217 
4218         continue;
4219       }
4220       return false;
4221     }
4222     case ISD::ZERO_EXTEND:
4223     case ISD::AssertZext: {
4224       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4225       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4226       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4227         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4228         Op.getOperand(0).getValueType();
4229 
4230       // We can accept extending nodes if the mask is wider or an equal
4231       // width to the original type.
4232       if (ExtVT.bitsGE(VT))
4233         continue;
4234       break;
4235     }
4236     case ISD::OR:
4237     case ISD::XOR:
4238     case ISD::AND:
4239       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4240                              NodeToMask))
4241         return false;
4242       continue;
4243     }
4244 
4245     // Allow one node which will masked along with any loads found.
4246     if (NodeToMask)
4247       return false;
4248 
4249     // Also ensure that the node to be masked only produces one data result.
4250     NodeToMask = Op.getNode();
4251     if (NodeToMask->getNumValues() > 1) {
4252       bool HasValue = false;
4253       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4254         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4255         if (VT != MVT::Glue && VT != MVT::Other) {
4256           if (HasValue) {
4257             NodeToMask = nullptr;
4258             return false;
4259           }
4260           HasValue = true;
4261         }
4262       }
4263       assert(HasValue && "Node to be masked has no data result?");
4264     }
4265   }
4266   return true;
4267 }
4268 
4269 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4270   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4271   if (!Mask)
4272     return false;
4273 
4274   if (!Mask->getAPIntValue().isMask())
4275     return false;
4276 
4277   // No need to do anything if the and directly uses a load.
4278   if (isa<LoadSDNode>(N->getOperand(0)))
4279     return false;
4280 
4281   SmallVector<LoadSDNode*, 8> Loads;
4282   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4283   SDNode *FixupNode = nullptr;
4284   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4285     if (Loads.size() == 0)
4286       return false;
4287 
4288     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4289     SDValue MaskOp = N->getOperand(1);
4290 
4291     // If it exists, fixup the single node we allow in the tree that needs
4292     // masking.
4293     if (FixupNode) {
4294       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4295       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4296                                 FixupNode->getValueType(0),
4297                                 SDValue(FixupNode, 0), MaskOp);
4298       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4299       if (And.getOpcode() == ISD ::AND)
4300         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4301     }
4302 
4303     // Narrow any constants that need it.
4304     for (auto *LogicN : NodesWithConsts) {
4305       SDValue Op0 = LogicN->getOperand(0);
4306       SDValue Op1 = LogicN->getOperand(1);
4307 
4308       if (isa<ConstantSDNode>(Op0))
4309           std::swap(Op0, Op1);
4310 
4311       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4312                                 Op1, MaskOp);
4313 
4314       DAG.UpdateNodeOperands(LogicN, Op0, And);
4315     }
4316 
4317     // Create narrow loads.
4318     for (auto *Load : Loads) {
4319       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4320       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4321                                 SDValue(Load, 0), MaskOp);
4322       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4323       if (And.getOpcode() == ISD ::AND)
4324         And = SDValue(
4325             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4326       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4327       assert(NewLoad &&
4328              "Shouldn't be masking the load if it can't be narrowed");
4329       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4330     }
4331     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4332     return true;
4333   }
4334   return false;
4335 }
4336 
4337 // Unfold
4338 //    x &  (-1 'logical shift' y)
4339 // To
4340 //    (x 'opposite logical shift' y) 'logical shift' y
4341 // if it is better for performance.
4342 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4343   assert(N->getOpcode() == ISD::AND);
4344 
4345   SDValue N0 = N->getOperand(0);
4346   SDValue N1 = N->getOperand(1);
4347 
4348   // Do we actually prefer shifts over mask?
4349   if (!TLI.preferShiftsToClearExtremeBits(N0))
4350     return SDValue();
4351 
4352   // Try to match  (-1 '[outer] logical shift' y)
4353   unsigned OuterShift;
4354   unsigned InnerShift; // The opposite direction to the OuterShift.
4355   SDValue Y;           // Shift amount.
4356   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4357     if (!M.hasOneUse())
4358       return false;
4359     OuterShift = M->getOpcode();
4360     if (OuterShift == ISD::SHL)
4361       InnerShift = ISD::SRL;
4362     else if (OuterShift == ISD::SRL)
4363       InnerShift = ISD::SHL;
4364     else
4365       return false;
4366     if (!isAllOnesConstant(M->getOperand(0)))
4367       return false;
4368     Y = M->getOperand(1);
4369     return true;
4370   };
4371 
4372   SDValue X;
4373   if (matchMask(N1))
4374     X = N0;
4375   else if (matchMask(N0))
4376     X = N1;
4377   else
4378     return SDValue();
4379 
4380   SDLoc DL(N);
4381   EVT VT = N->getValueType(0);
4382 
4383   //     tmp = x   'opposite logical shift' y
4384   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4385   //     ret = tmp 'logical shift' y
4386   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4387 
4388   return T1;
4389 }
4390 
4391 SDValue DAGCombiner::visitAND(SDNode *N) {
4392   SDValue N0 = N->getOperand(0);
4393   SDValue N1 = N->getOperand(1);
4394   EVT VT = N1.getValueType();
4395 
4396   // x & x --> x
4397   if (N0 == N1)
4398     return N0;
4399 
4400   // fold vector ops
4401   if (VT.isVector()) {
4402     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4403       return FoldedVOp;
4404 
4405     // fold (and x, 0) -> 0, vector edition
4406     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4407       // do not return N0, because undef node may exist in N0
4408       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4409                              SDLoc(N), N0.getValueType());
4410     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4411       // do not return N1, because undef node may exist in N1
4412       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4413                              SDLoc(N), N1.getValueType());
4414 
4415     // fold (and x, -1) -> x, vector edition
4416     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4417       return N1;
4418     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4419       return N0;
4420   }
4421 
4422   // fold (and c1, c2) -> c1&c2
4423   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4424   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4425   if (N0C && N1C && !N1C->isOpaque())
4426     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4427   // canonicalize constant to RHS
4428   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4429       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4430     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4431   // fold (and x, -1) -> x
4432   if (isAllOnesConstant(N1))
4433     return N0;
4434   // if (and x, c) is known to be zero, return 0
4435   unsigned BitWidth = VT.getScalarSizeInBits();
4436   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4437                                    APInt::getAllOnesValue(BitWidth)))
4438     return DAG.getConstant(0, SDLoc(N), VT);
4439 
4440   if (SDValue NewSel = foldBinOpIntoSelect(N))
4441     return NewSel;
4442 
4443   // reassociate and
4444   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4445     return RAND;
4446 
4447   // Try to convert a constant mask AND into a shuffle clear mask.
4448   if (VT.isVector())
4449     if (SDValue Shuffle = XformToShuffleWithZero(N))
4450       return Shuffle;
4451 
4452   // fold (and (or x, C), D) -> D if (C & D) == D
4453   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4454     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4455   };
4456   if (N0.getOpcode() == ISD::OR &&
4457       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4458     return N1;
4459   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4460   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4461     SDValue N0Op0 = N0.getOperand(0);
4462     APInt Mask = ~N1C->getAPIntValue();
4463     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4464     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4465       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4466                                  N0.getValueType(), N0Op0);
4467 
4468       // Replace uses of the AND with uses of the Zero extend node.
4469       CombineTo(N, Zext);
4470 
4471       // We actually want to replace all uses of the any_extend with the
4472       // zero_extend, to avoid duplicating things.  This will later cause this
4473       // AND to be folded.
4474       CombineTo(N0.getNode(), Zext);
4475       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4476     }
4477   }
4478   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4479   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4480   // already be zero by virtue of the width of the base type of the load.
4481   //
4482   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4483   // more cases.
4484   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4485        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4486        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4487        N0.getOperand(0).getResNo() == 0) ||
4488       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4489     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4490                                          N0 : N0.getOperand(0) );
4491 
4492     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4493     // This can be a pure constant or a vector splat, in which case we treat the
4494     // vector as a scalar and use the splat value.
4495     APInt Constant = APInt::getNullValue(1);
4496     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4497       Constant = C->getAPIntValue();
4498     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4499       APInt SplatValue, SplatUndef;
4500       unsigned SplatBitSize;
4501       bool HasAnyUndefs;
4502       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4503                                              SplatBitSize, HasAnyUndefs);
4504       if (IsSplat) {
4505         // Undef bits can contribute to a possible optimisation if set, so
4506         // set them.
4507         SplatValue |= SplatUndef;
4508 
4509         // The splat value may be something like "0x00FFFFFF", which means 0 for
4510         // the first vector value and FF for the rest, repeating. We need a mask
4511         // that will apply equally to all members of the vector, so AND all the
4512         // lanes of the constant together.
4513         EVT VT = Vector->getValueType(0);
4514         unsigned BitWidth = VT.getScalarSizeInBits();
4515 
4516         // If the splat value has been compressed to a bitlength lower
4517         // than the size of the vector lane, we need to re-expand it to
4518         // the lane size.
4519         if (BitWidth > SplatBitSize)
4520           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4521                SplatBitSize < BitWidth;
4522                SplatBitSize = SplatBitSize * 2)
4523             SplatValue |= SplatValue.shl(SplatBitSize);
4524 
4525         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4526         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4527         if (SplatBitSize % BitWidth == 0) {
4528           Constant = APInt::getAllOnesValue(BitWidth);
4529           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4530             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4531         }
4532       }
4533     }
4534 
4535     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4536     // actually legal and isn't going to get expanded, else this is a false
4537     // optimisation.
4538     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4539                                                     Load->getValueType(0),
4540                                                     Load->getMemoryVT());
4541 
4542     // Resize the constant to the same size as the original memory access before
4543     // extension. If it is still the AllOnesValue then this AND is completely
4544     // unneeded.
4545     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4546 
4547     bool B;
4548     switch (Load->getExtensionType()) {
4549     default: B = false; break;
4550     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4551     case ISD::ZEXTLOAD:
4552     case ISD::NON_EXTLOAD: B = true; break;
4553     }
4554 
4555     if (B && Constant.isAllOnesValue()) {
4556       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4557       // preserve semantics once we get rid of the AND.
4558       SDValue NewLoad(Load, 0);
4559 
4560       // Fold the AND away. NewLoad may get replaced immediately.
4561       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4562 
4563       if (Load->getExtensionType() == ISD::EXTLOAD) {
4564         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4565                               Load->getValueType(0), SDLoc(Load),
4566                               Load->getChain(), Load->getBasePtr(),
4567                               Load->getOffset(), Load->getMemoryVT(),
4568                               Load->getMemOperand());
4569         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4570         if (Load->getNumValues() == 3) {
4571           // PRE/POST_INC loads have 3 values.
4572           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4573                            NewLoad.getValue(2) };
4574           CombineTo(Load, To, 3, true);
4575         } else {
4576           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4577         }
4578       }
4579 
4580       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4581     }
4582   }
4583 
4584   // fold (and (load x), 255) -> (zextload x, i8)
4585   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4586   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4587   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4588                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4589                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4590     if (SDValue Res = ReduceLoadWidth(N)) {
4591       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4592         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4593 
4594       AddToWorklist(N);
4595       CombineTo(LN0, Res, Res.getValue(1));
4596       return SDValue(N, 0);
4597     }
4598   }
4599 
4600   if (Level >= AfterLegalizeTypes) {
4601     // Attempt to propagate the AND back up to the leaves which, if they're
4602     // loads, can be combined to narrow loads and the AND node can be removed.
4603     // Perform after legalization so that extend nodes will already be
4604     // combined into the loads.
4605     if (BackwardsPropagateMask(N, DAG)) {
4606       return SDValue(N, 0);
4607     }
4608   }
4609 
4610   if (SDValue Combined = visitANDLike(N0, N1, N))
4611     return Combined;
4612 
4613   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4614   if (N0.getOpcode() == N1.getOpcode())
4615     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4616       return Tmp;
4617 
4618   // Masking the negated extension of a boolean is just the zero-extended
4619   // boolean:
4620   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4621   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4622   //
4623   // Note: the SimplifyDemandedBits fold below can make an information-losing
4624   // transform, and then we have no way to find this better fold.
4625   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4626     if (isNullOrNullSplat(N0.getOperand(0))) {
4627       SDValue SubRHS = N0.getOperand(1);
4628       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4629           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4630         return SubRHS;
4631       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4632           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4633         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4634     }
4635   }
4636 
4637   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4638   // fold (and (sra)) -> (and (srl)) when possible.
4639   if (SimplifyDemandedBits(SDValue(N, 0)))
4640     return SDValue(N, 0);
4641 
4642   // fold (zext_inreg (extload x)) -> (zextload x)
4643   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4644     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4645     EVT MemVT = LN0->getMemoryVT();
4646     // If we zero all the possible extended bits, then we can turn this into
4647     // a zextload if we are running before legalize or the operation is legal.
4648     unsigned BitWidth = N1.getScalarValueSizeInBits();
4649     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4650                            BitWidth - MemVT.getScalarSizeInBits())) &&
4651         ((!LegalOperations && !LN0->isVolatile()) ||
4652          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4653       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4654                                        LN0->getChain(), LN0->getBasePtr(),
4655                                        MemVT, LN0->getMemOperand());
4656       AddToWorklist(N);
4657       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4658       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4659     }
4660   }
4661   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4662   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4663       N0.hasOneUse()) {
4664     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4665     EVT MemVT = LN0->getMemoryVT();
4666     // If we zero all the possible extended bits, then we can turn this into
4667     // a zextload if we are running before legalize or the operation is legal.
4668     unsigned BitWidth = N1.getScalarValueSizeInBits();
4669     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4670                            BitWidth - MemVT.getScalarSizeInBits())) &&
4671         ((!LegalOperations && !LN0->isVolatile()) ||
4672          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4673       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4674                                        LN0->getChain(), LN0->getBasePtr(),
4675                                        MemVT, LN0->getMemOperand());
4676       AddToWorklist(N);
4677       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4678       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4679     }
4680   }
4681   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4682   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4683     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4684                                            N0.getOperand(1), false))
4685       return BSwap;
4686   }
4687 
4688   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4689     return Shifts;
4690 
4691   return SDValue();
4692 }
4693 
4694 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4695 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4696                                         bool DemandHighBits) {
4697   if (!LegalOperations)
4698     return SDValue();
4699 
4700   EVT VT = N->getValueType(0);
4701   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4702     return SDValue();
4703   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4704     return SDValue();
4705 
4706   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4707   bool LookPassAnd0 = false;
4708   bool LookPassAnd1 = false;
4709   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4710       std::swap(N0, N1);
4711   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4712       std::swap(N0, N1);
4713   if (N0.getOpcode() == ISD::AND) {
4714     if (!N0.getNode()->hasOneUse())
4715       return SDValue();
4716     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4717     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4718     // This is needed for X86.
4719     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4720                   N01C->getZExtValue() != 0xFFFF))
4721       return SDValue();
4722     N0 = N0.getOperand(0);
4723     LookPassAnd0 = true;
4724   }
4725 
4726   if (N1.getOpcode() == ISD::AND) {
4727     if (!N1.getNode()->hasOneUse())
4728       return SDValue();
4729     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4730     if (!N11C || N11C->getZExtValue() != 0xFF)
4731       return SDValue();
4732     N1 = N1.getOperand(0);
4733     LookPassAnd1 = true;
4734   }
4735 
4736   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4737     std::swap(N0, N1);
4738   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4739     return SDValue();
4740   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4741     return SDValue();
4742 
4743   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4744   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4745   if (!N01C || !N11C)
4746     return SDValue();
4747   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4748     return SDValue();
4749 
4750   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4751   SDValue N00 = N0->getOperand(0);
4752   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4753     if (!N00.getNode()->hasOneUse())
4754       return SDValue();
4755     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4756     if (!N001C || N001C->getZExtValue() != 0xFF)
4757       return SDValue();
4758     N00 = N00.getOperand(0);
4759     LookPassAnd0 = true;
4760   }
4761 
4762   SDValue N10 = N1->getOperand(0);
4763   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4764     if (!N10.getNode()->hasOneUse())
4765       return SDValue();
4766     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4767     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4768     // for X86.
4769     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4770                    N101C->getZExtValue() != 0xFFFF))
4771       return SDValue();
4772     N10 = N10.getOperand(0);
4773     LookPassAnd1 = true;
4774   }
4775 
4776   if (N00 != N10)
4777     return SDValue();
4778 
4779   // Make sure everything beyond the low halfword gets set to zero since the SRL
4780   // 16 will clear the top bits.
4781   unsigned OpSizeInBits = VT.getSizeInBits();
4782   if (DemandHighBits && OpSizeInBits > 16) {
4783     // If the left-shift isn't masked out then the only way this is a bswap is
4784     // if all bits beyond the low 8 are 0. In that case the entire pattern
4785     // reduces to a left shift anyway: leave it for other parts of the combiner.
4786     if (!LookPassAnd0)
4787       return SDValue();
4788 
4789     // However, if the right shift isn't masked out then it might be because
4790     // it's not needed. See if we can spot that too.
4791     if (!LookPassAnd1 &&
4792         !DAG.MaskedValueIsZero(
4793             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4794       return SDValue();
4795   }
4796 
4797   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4798   if (OpSizeInBits > 16) {
4799     SDLoc DL(N);
4800     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4801                       DAG.getConstant(OpSizeInBits - 16, DL,
4802                                       getShiftAmountTy(VT)));
4803   }
4804   return Res;
4805 }
4806 
4807 /// Return true if the specified node is an element that makes up a 32-bit
4808 /// packed halfword byteswap.
4809 /// ((x & 0x000000ff) << 8) |
4810 /// ((x & 0x0000ff00) >> 8) |
4811 /// ((x & 0x00ff0000) << 8) |
4812 /// ((x & 0xff000000) >> 8)
4813 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4814   if (!N.getNode()->hasOneUse())
4815     return false;
4816 
4817   unsigned Opc = N.getOpcode();
4818   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4819     return false;
4820 
4821   SDValue N0 = N.getOperand(0);
4822   unsigned Opc0 = N0.getOpcode();
4823   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4824     return false;
4825 
4826   ConstantSDNode *N1C = nullptr;
4827   // SHL or SRL: look upstream for AND mask operand
4828   if (Opc == ISD::AND)
4829     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4830   else if (Opc0 == ISD::AND)
4831     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4832   if (!N1C)
4833     return false;
4834 
4835   unsigned MaskByteOffset;
4836   switch (N1C->getZExtValue()) {
4837   default:
4838     return false;
4839   case 0xFF:       MaskByteOffset = 0; break;
4840   case 0xFF00:     MaskByteOffset = 1; break;
4841   case 0xFFFF:
4842     // In case demanded bits didn't clear the bits that will be shifted out.
4843     // This is needed for X86.
4844     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4845       MaskByteOffset = 1;
4846       break;
4847     }
4848     return false;
4849   case 0xFF0000:   MaskByteOffset = 2; break;
4850   case 0xFF000000: MaskByteOffset = 3; break;
4851   }
4852 
4853   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4854   if (Opc == ISD::AND) {
4855     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4856       // (x >> 8) & 0xff
4857       // (x >> 8) & 0xff0000
4858       if (Opc0 != ISD::SRL)
4859         return false;
4860       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4861       if (!C || C->getZExtValue() != 8)
4862         return false;
4863     } else {
4864       // (x << 8) & 0xff00
4865       // (x << 8) & 0xff000000
4866       if (Opc0 != ISD::SHL)
4867         return false;
4868       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4869       if (!C || C->getZExtValue() != 8)
4870         return false;
4871     }
4872   } else if (Opc == ISD::SHL) {
4873     // (x & 0xff) << 8
4874     // (x & 0xff0000) << 8
4875     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4876       return false;
4877     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4878     if (!C || C->getZExtValue() != 8)
4879       return false;
4880   } else { // Opc == ISD::SRL
4881     // (x & 0xff00) >> 8
4882     // (x & 0xff000000) >> 8
4883     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4884       return false;
4885     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4886     if (!C || C->getZExtValue() != 8)
4887       return false;
4888   }
4889 
4890   if (Parts[MaskByteOffset])
4891     return false;
4892 
4893   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4894   return true;
4895 }
4896 
4897 /// Match a 32-bit packed halfword bswap. That is
4898 /// ((x & 0x000000ff) << 8) |
4899 /// ((x & 0x0000ff00) >> 8) |
4900 /// ((x & 0x00ff0000) << 8) |
4901 /// ((x & 0xff000000) >> 8)
4902 /// => (rotl (bswap x), 16)
4903 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4904   if (!LegalOperations)
4905     return SDValue();
4906 
4907   EVT VT = N->getValueType(0);
4908   if (VT != MVT::i32)
4909     return SDValue();
4910   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4911     return SDValue();
4912 
4913   // Look for either
4914   // (or (or (and), (and)), (or (and), (and)))
4915   // (or (or (or (and), (and)), (and)), (and))
4916   if (N0.getOpcode() != ISD::OR)
4917     return SDValue();
4918   SDValue N00 = N0.getOperand(0);
4919   SDValue N01 = N0.getOperand(1);
4920   SDNode *Parts[4] = {};
4921 
4922   if (N1.getOpcode() == ISD::OR &&
4923       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4924     // (or (or (and), (and)), (or (and), (and)))
4925     if (!isBSwapHWordElement(N00, Parts))
4926       return SDValue();
4927 
4928     if (!isBSwapHWordElement(N01, Parts))
4929       return SDValue();
4930     SDValue N10 = N1.getOperand(0);
4931     if (!isBSwapHWordElement(N10, Parts))
4932       return SDValue();
4933     SDValue N11 = N1.getOperand(1);
4934     if (!isBSwapHWordElement(N11, Parts))
4935       return SDValue();
4936   } else {
4937     // (or (or (or (and), (and)), (and)), (and))
4938     if (!isBSwapHWordElement(N1, Parts))
4939       return SDValue();
4940     if (!isBSwapHWordElement(N01, Parts))
4941       return SDValue();
4942     if (N00.getOpcode() != ISD::OR)
4943       return SDValue();
4944     SDValue N000 = N00.getOperand(0);
4945     if (!isBSwapHWordElement(N000, Parts))
4946       return SDValue();
4947     SDValue N001 = N00.getOperand(1);
4948     if (!isBSwapHWordElement(N001, Parts))
4949       return SDValue();
4950   }
4951 
4952   // Make sure the parts are all coming from the same node.
4953   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4954     return SDValue();
4955 
4956   SDLoc DL(N);
4957   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4958                               SDValue(Parts[0], 0));
4959 
4960   // Result of the bswap should be rotated by 16. If it's not legal, then
4961   // do  (x << 16) | (x >> 16).
4962   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4963   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4964     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4965   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4966     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4967   return DAG.getNode(ISD::OR, DL, VT,
4968                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4969                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4970 }
4971 
4972 /// This contains all DAGCombine rules which reduce two values combined by
4973 /// an Or operation to a single value \see visitANDLike().
4974 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4975   EVT VT = N1.getValueType();
4976   SDLoc DL(N);
4977 
4978   // fold (or x, undef) -> -1
4979   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4980     return DAG.getAllOnesConstant(DL, VT);
4981 
4982   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4983     return V;
4984 
4985   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4986   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4987       // Don't increase # computations.
4988       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4989     // We can only do this xform if we know that bits from X that are set in C2
4990     // but not in C1 are already zero.  Likewise for Y.
4991     if (const ConstantSDNode *N0O1C =
4992         getAsNonOpaqueConstant(N0.getOperand(1))) {
4993       if (const ConstantSDNode *N1O1C =
4994           getAsNonOpaqueConstant(N1.getOperand(1))) {
4995         // We can only do this xform if we know that bits from X that are set in
4996         // C2 but not in C1 are already zero.  Likewise for Y.
4997         const APInt &LHSMask = N0O1C->getAPIntValue();
4998         const APInt &RHSMask = N1O1C->getAPIntValue();
4999 
5000         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5001             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5002           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5003                                   N0.getOperand(0), N1.getOperand(0));
5004           return DAG.getNode(ISD::AND, DL, VT, X,
5005                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5006         }
5007       }
5008     }
5009   }
5010 
5011   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5012   if (N0.getOpcode() == ISD::AND &&
5013       N1.getOpcode() == ISD::AND &&
5014       N0.getOperand(0) == N1.getOperand(0) &&
5015       // Don't increase # computations.
5016       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5017     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5018                             N0.getOperand(1), N1.getOperand(1));
5019     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5020   }
5021 
5022   return SDValue();
5023 }
5024 
5025 SDValue DAGCombiner::visitOR(SDNode *N) {
5026   SDValue N0 = N->getOperand(0);
5027   SDValue N1 = N->getOperand(1);
5028   EVT VT = N1.getValueType();
5029 
5030   // x | x --> x
5031   if (N0 == N1)
5032     return N0;
5033 
5034   // fold vector ops
5035   if (VT.isVector()) {
5036     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5037       return FoldedVOp;
5038 
5039     // fold (or x, 0) -> x, vector edition
5040     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5041       return N1;
5042     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5043       return N0;
5044 
5045     // fold (or x, -1) -> -1, vector edition
5046     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5047       // do not return N0, because undef node may exist in N0
5048       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5049     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5050       // do not return N1, because undef node may exist in N1
5051       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5052 
5053     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5054     // Do this only if the resulting shuffle is legal.
5055     if (isa<ShuffleVectorSDNode>(N0) &&
5056         isa<ShuffleVectorSDNode>(N1) &&
5057         // Avoid folding a node with illegal type.
5058         TLI.isTypeLegal(VT)) {
5059       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5060       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5061       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5062       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5063       // Ensure both shuffles have a zero input.
5064       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5065         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5066         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5067         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5068         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5069         bool CanFold = true;
5070         int NumElts = VT.getVectorNumElements();
5071         SmallVector<int, 4> Mask(NumElts);
5072 
5073         for (int i = 0; i != NumElts; ++i) {
5074           int M0 = SV0->getMaskElt(i);
5075           int M1 = SV1->getMaskElt(i);
5076 
5077           // Determine if either index is pointing to a zero vector.
5078           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5079           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5080 
5081           // If one element is zero and the otherside is undef, keep undef.
5082           // This also handles the case that both are undef.
5083           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5084             Mask[i] = -1;
5085             continue;
5086           }
5087 
5088           // Make sure only one of the elements is zero.
5089           if (M0Zero == M1Zero) {
5090             CanFold = false;
5091             break;
5092           }
5093 
5094           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5095 
5096           // We have a zero and non-zero element. If the non-zero came from
5097           // SV0 make the index a LHS index. If it came from SV1, make it
5098           // a RHS index. We need to mod by NumElts because we don't care
5099           // which operand it came from in the original shuffles.
5100           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5101         }
5102 
5103         if (CanFold) {
5104           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5105           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5106 
5107           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5108           if (!LegalMask) {
5109             std::swap(NewLHS, NewRHS);
5110             ShuffleVectorSDNode::commuteMask(Mask);
5111             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5112           }
5113 
5114           if (LegalMask)
5115             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5116         }
5117       }
5118     }
5119   }
5120 
5121   // fold (or c1, c2) -> c1|c2
5122   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5123   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5124   if (N0C && N1C && !N1C->isOpaque())
5125     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5126   // canonicalize constant to RHS
5127   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5128      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5129     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5130   // fold (or x, 0) -> x
5131   if (isNullConstant(N1))
5132     return N0;
5133   // fold (or x, -1) -> -1
5134   if (isAllOnesConstant(N1))
5135     return N1;
5136 
5137   if (SDValue NewSel = foldBinOpIntoSelect(N))
5138     return NewSel;
5139 
5140   // fold (or x, c) -> c iff (x & ~c) == 0
5141   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5142     return N1;
5143 
5144   if (SDValue Combined = visitORLike(N0, N1, N))
5145     return Combined;
5146 
5147   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5148   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5149     return BSwap;
5150   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5151     return BSwap;
5152 
5153   // reassociate or
5154   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5155     return ROR;
5156 
5157   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5158   // iff (c1 & c2) != 0.
5159   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5160     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
5161   };
5162   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5163       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
5164     if (SDValue COR = DAG.FoldConstantArithmetic(
5165             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5166       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5167       AddToWorklist(IOR.getNode());
5168       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5169     }
5170   }
5171 
5172   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5173   if (N0.getOpcode() == N1.getOpcode())
5174     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5175       return Tmp;
5176 
5177   // See if this is some rotate idiom.
5178   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5179     return SDValue(Rot, 0);
5180 
5181   if (SDValue Load = MatchLoadCombine(N))
5182     return Load;
5183 
5184   // Simplify the operands using demanded-bits information.
5185   if (SimplifyDemandedBits(SDValue(N, 0)))
5186     return SDValue(N, 0);
5187 
5188   return SDValue();
5189 }
5190 
5191 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5192   if (Op.getOpcode() == ISD::AND &&
5193       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5194     Mask = Op.getOperand(1);
5195     return Op.getOperand(0);
5196   }
5197   return Op;
5198 }
5199 
5200 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5201 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5202                             SDValue &Mask) {
5203   Op = stripConstantMask(DAG, Op, Mask);
5204   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5205     Shift = Op;
5206     return true;
5207   }
5208   return false;
5209 }
5210 
5211 /// Helper function for visitOR to extract the needed side of a rotate idiom
5212 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5213 /// InstCombine merged some outside op with one of the shifts from
5214 /// the rotate pattern.
5215 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5216 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5217 /// patterns:
5218 ///
5219 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5220 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5221 ///
5222 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5223 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5224 ///
5225 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5226 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5227 ///
5228 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5229 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5230 ///
5231 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5232 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5233                                      SDValue ExtractFrom, SDValue &Mask,
5234                                      const SDLoc &DL) {
5235   assert(OppShift && ExtractFrom && "Empty SDValue");
5236   assert(
5237       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5238       "Existing shift must be valid as a rotate half");
5239 
5240   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5241   // Preconditions:
5242   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5243   //
5244   // Find opcode of the needed shift to be extracted from (op0 v c0).
5245   unsigned Opcode = ISD::DELETED_NODE;
5246   bool IsMulOrDiv = false;
5247   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5248   // opcode or its arithmetic (mul or udiv) variant.
5249   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5250     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5251     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5252       return false;
5253     Opcode = NeededShift;
5254     return true;
5255   };
5256   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5257   // that the needed shift can be extracted from.
5258   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5259       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5260     return SDValue();
5261 
5262   // op0 must be the same opcode on both sides, have the same LHS argument,
5263   // and produce the same value type.
5264   SDValue OppShiftLHS = OppShift.getOperand(0);
5265   EVT ShiftedVT = OppShiftLHS.getValueType();
5266   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5267       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5268       ShiftedVT != ExtractFrom.getValueType())
5269     return SDValue();
5270 
5271   // Amount of the existing shift.
5272   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5273   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5274   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5275   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5276   ConstantSDNode *ExtractFromCst =
5277       isConstOrConstSplat(ExtractFrom.getOperand(1));
5278   // TODO: We should be able to handle non-uniform constant vectors for these values
5279   // Check that we have constant values.
5280   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5281       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5282       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5283     return SDValue();
5284 
5285   // Compute the shift amount we need to extract to complete the rotate.
5286   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5287   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5288     return SDValue();
5289   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5290   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5291   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5292   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5293   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5294 
5295   // Now try extract the needed shift from the ExtractFrom op and see if the
5296   // result matches up with the existing shift's LHS op.
5297   if (IsMulOrDiv) {
5298     // Op to extract from is a mul or udiv by a constant.
5299     // Check:
5300     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5301     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5302     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5303                                                  NeededShiftAmt.getZExtValue());
5304     APInt ResultAmt;
5305     APInt Rem;
5306     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5307     if (Rem != 0 || ResultAmt != OppLHSAmt)
5308       return SDValue();
5309   } else {
5310     // Op to extract from is a shift by a constant.
5311     // Check:
5312     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5313     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5314                                           ExtractFromAmt.getBitWidth()))
5315       return SDValue();
5316   }
5317 
5318   // Return the expanded shift op that should allow a rotate to be formed.
5319   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5320   EVT ResVT = ExtractFrom.getValueType();
5321   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5322   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5323 }
5324 
5325 // Return true if we can prove that, whenever Neg and Pos are both in the
5326 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5327 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5328 //
5329 //     (or (shift1 X, Neg), (shift2 X, Pos))
5330 //
5331 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5332 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5333 // to consider shift amounts with defined behavior.
5334 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5335                            SelectionDAG &DAG) {
5336   // If EltSize is a power of 2 then:
5337   //
5338   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5339   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5340   //
5341   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5342   // for the stronger condition:
5343   //
5344   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5345   //
5346   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5347   // we can just replace Neg with Neg' for the rest of the function.
5348   //
5349   // In other cases we check for the even stronger condition:
5350   //
5351   //     Neg == EltSize - Pos                                    [B]
5352   //
5353   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5354   // behavior if Pos == 0 (and consequently Neg == EltSize).
5355   //
5356   // We could actually use [A] whenever EltSize is a power of 2, but the
5357   // only extra cases that it would match are those uninteresting ones
5358   // where Neg and Pos are never in range at the same time.  E.g. for
5359   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5360   // as well as (sub 32, Pos), but:
5361   //
5362   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5363   //
5364   // always invokes undefined behavior for 32-bit X.
5365   //
5366   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5367   unsigned MaskLoBits = 0;
5368   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5369     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5370       KnownBits Known;
5371       DAG.computeKnownBits(Neg.getOperand(0), Known);
5372       unsigned Bits = Log2_64(EltSize);
5373       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5374           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5375         Neg = Neg.getOperand(0);
5376         MaskLoBits = Bits;
5377       }
5378     }
5379   }
5380 
5381   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5382   if (Neg.getOpcode() != ISD::SUB)
5383     return false;
5384   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5385   if (!NegC)
5386     return false;
5387   SDValue NegOp1 = Neg.getOperand(1);
5388 
5389   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5390   // Pos'.  The truncation is redundant for the purpose of the equality.
5391   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5392     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5393       KnownBits Known;
5394       DAG.computeKnownBits(Pos.getOperand(0), Known);
5395       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5396           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5397            MaskLoBits))
5398         Pos = Pos.getOperand(0);
5399     }
5400   }
5401 
5402   // The condition we need is now:
5403   //
5404   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5405   //
5406   // If NegOp1 == Pos then we need:
5407   //
5408   //              EltSize & Mask == NegC & Mask
5409   //
5410   // (because "x & Mask" is a truncation and distributes through subtraction).
5411   APInt Width;
5412   if (Pos == NegOp1)
5413     Width = NegC->getAPIntValue();
5414 
5415   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5416   // Then the condition we want to prove becomes:
5417   //
5418   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5419   //
5420   // which, again because "x & Mask" is a truncation, becomes:
5421   //
5422   //                NegC & Mask == (EltSize - PosC) & Mask
5423   //             EltSize & Mask == (NegC + PosC) & Mask
5424   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5425     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5426       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5427     else
5428       return false;
5429   } else
5430     return false;
5431 
5432   // Now we just need to check that EltSize & Mask == Width & Mask.
5433   if (MaskLoBits)
5434     // EltSize & Mask is 0 since Mask is EltSize - 1.
5435     return Width.getLoBits(MaskLoBits) == 0;
5436   return Width == EltSize;
5437 }
5438 
5439 // A subroutine of MatchRotate used once we have found an OR of two opposite
5440 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5441 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5442 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5443 // Neg with outer conversions stripped away.
5444 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5445                                        SDValue Neg, SDValue InnerPos,
5446                                        SDValue InnerNeg, unsigned PosOpcode,
5447                                        unsigned NegOpcode, const SDLoc &DL) {
5448   // fold (or (shl x, (*ext y)),
5449   //          (srl x, (*ext (sub 32, y)))) ->
5450   //   (rotl x, y) or (rotr x, (sub 32, y))
5451   //
5452   // fold (or (shl x, (*ext (sub 32, y))),
5453   //          (srl x, (*ext y))) ->
5454   //   (rotr x, y) or (rotl x, (sub 32, y))
5455   EVT VT = Shifted.getValueType();
5456   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5457     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5458     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5459                        HasPos ? Pos : Neg).getNode();
5460   }
5461 
5462   return nullptr;
5463 }
5464 
5465 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5466 // idioms for rotate, and if the target supports rotation instructions, generate
5467 // a rot[lr].
5468 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5469   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5470   EVT VT = LHS.getValueType();
5471   if (!TLI.isTypeLegal(VT)) return nullptr;
5472 
5473   // The target must have at least one rotate flavor.
5474   bool HasROTL = hasOperation(ISD::ROTL, VT);
5475   bool HasROTR = hasOperation(ISD::ROTR, VT);
5476   if (!HasROTL && !HasROTR) return nullptr;
5477 
5478   // Check for truncated rotate.
5479   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5480       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5481     assert(LHS.getValueType() == RHS.getValueType());
5482     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5483       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5484                          SDValue(Rot, 0)).getNode();
5485     }
5486   }
5487 
5488   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5489   SDValue LHSShift;   // The shift.
5490   SDValue LHSMask;    // AND value if any.
5491   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5492 
5493   SDValue RHSShift;   // The shift.
5494   SDValue RHSMask;    // AND value if any.
5495   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5496 
5497   // If neither side matched a rotate half, bail
5498   if (!LHSShift && !RHSShift)
5499     return nullptr;
5500 
5501   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5502   // side of the rotate, so try to handle that here. In all cases we need to
5503   // pass the matched shift from the opposite side to compute the opcode and
5504   // needed shift amount to extract.  We still want to do this if both sides
5505   // matched a rotate half because one half may be a potential overshift that
5506   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5507   // single one).
5508 
5509   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5510   if (LHSShift)
5511     if (SDValue NewRHSShift =
5512             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5513       RHSShift = NewRHSShift;
5514   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5515   if (RHSShift)
5516     if (SDValue NewLHSShift =
5517             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5518       LHSShift = NewLHSShift;
5519 
5520   // If a side is still missing, nothing else we can do.
5521   if (!RHSShift || !LHSShift)
5522     return nullptr;
5523 
5524   // At this point we've matched or extracted a shift op on each side.
5525 
5526   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5527     return nullptr;   // Not shifting the same value.
5528 
5529   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5530     return nullptr;   // Shifts must disagree.
5531 
5532   // Canonicalize shl to left side in a shl/srl pair.
5533   if (RHSShift.getOpcode() == ISD::SHL) {
5534     std::swap(LHS, RHS);
5535     std::swap(LHSShift, RHSShift);
5536     std::swap(LHSMask, RHSMask);
5537   }
5538 
5539   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5540   SDValue LHSShiftArg = LHSShift.getOperand(0);
5541   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5542   SDValue RHSShiftArg = RHSShift.getOperand(0);
5543   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5544 
5545   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5546   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5547   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5548                                         ConstantSDNode *RHS) {
5549     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5550   };
5551   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5552     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5553                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5554 
5555     // If there is an AND of either shifted operand, apply it to the result.
5556     if (LHSMask.getNode() || RHSMask.getNode()) {
5557       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5558       SDValue Mask = AllOnes;
5559 
5560       if (LHSMask.getNode()) {
5561         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5562         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5563                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5564       }
5565       if (RHSMask.getNode()) {
5566         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5567         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5568                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5569       }
5570 
5571       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5572     }
5573 
5574     return Rot.getNode();
5575   }
5576 
5577   // If there is a mask here, and we have a variable shift, we can't be sure
5578   // that we're masking out the right stuff.
5579   if (LHSMask.getNode() || RHSMask.getNode())
5580     return nullptr;
5581 
5582   // If the shift amount is sign/zext/any-extended just peel it off.
5583   SDValue LExtOp0 = LHSShiftAmt;
5584   SDValue RExtOp0 = RHSShiftAmt;
5585   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5586        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5587        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5588        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5589       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5590        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5591        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5592        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5593     LExtOp0 = LHSShiftAmt.getOperand(0);
5594     RExtOp0 = RHSShiftAmt.getOperand(0);
5595   }
5596 
5597   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5598                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5599   if (TryL)
5600     return TryL;
5601 
5602   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5603                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5604   if (TryR)
5605     return TryR;
5606 
5607   return nullptr;
5608 }
5609 
5610 namespace {
5611 
5612 /// Represents known origin of an individual byte in load combine pattern. The
5613 /// value of the byte is either constant zero or comes from memory.
5614 struct ByteProvider {
5615   // For constant zero providers Load is set to nullptr. For memory providers
5616   // Load represents the node which loads the byte from memory.
5617   // ByteOffset is the offset of the byte in the value produced by the load.
5618   LoadSDNode *Load = nullptr;
5619   unsigned ByteOffset = 0;
5620 
5621   ByteProvider() = default;
5622 
5623   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5624     return ByteProvider(Load, ByteOffset);
5625   }
5626 
5627   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5628 
5629   bool isConstantZero() const { return !Load; }
5630   bool isMemory() const { return Load; }
5631 
5632   bool operator==(const ByteProvider &Other) const {
5633     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5634   }
5635 
5636 private:
5637   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5638       : Load(Load), ByteOffset(ByteOffset) {}
5639 };
5640 
5641 } // end anonymous namespace
5642 
5643 /// Recursively traverses the expression calculating the origin of the requested
5644 /// byte of the given value. Returns None if the provider can't be calculated.
5645 ///
5646 /// For all the values except the root of the expression verifies that the value
5647 /// has exactly one use and if it's not true return None. This way if the origin
5648 /// of the byte is returned it's guaranteed that the values which contribute to
5649 /// the byte are not used outside of this expression.
5650 ///
5651 /// Because the parts of the expression are not allowed to have more than one
5652 /// use this function iterates over trees, not DAGs. So it never visits the same
5653 /// node more than once.
5654 static const Optional<ByteProvider>
5655 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5656                       bool Root = false) {
5657   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5658   if (Depth == 10)
5659     return None;
5660 
5661   if (!Root && !Op.hasOneUse())
5662     return None;
5663 
5664   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5665   unsigned BitWidth = Op.getValueSizeInBits();
5666   if (BitWidth % 8 != 0)
5667     return None;
5668   unsigned ByteWidth = BitWidth / 8;
5669   assert(Index < ByteWidth && "invalid index requested");
5670   (void) ByteWidth;
5671 
5672   switch (Op.getOpcode()) {
5673   case ISD::OR: {
5674     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5675     if (!LHS)
5676       return None;
5677     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5678     if (!RHS)
5679       return None;
5680 
5681     if (LHS->isConstantZero())
5682       return RHS;
5683     if (RHS->isConstantZero())
5684       return LHS;
5685     return None;
5686   }
5687   case ISD::SHL: {
5688     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5689     if (!ShiftOp)
5690       return None;
5691 
5692     uint64_t BitShift = ShiftOp->getZExtValue();
5693     if (BitShift % 8 != 0)
5694       return None;
5695     uint64_t ByteShift = BitShift / 8;
5696 
5697     return Index < ByteShift
5698                ? ByteProvider::getConstantZero()
5699                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5700                                        Depth + 1);
5701   }
5702   case ISD::ANY_EXTEND:
5703   case ISD::SIGN_EXTEND:
5704   case ISD::ZERO_EXTEND: {
5705     SDValue NarrowOp = Op->getOperand(0);
5706     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5707     if (NarrowBitWidth % 8 != 0)
5708       return None;
5709     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5710 
5711     if (Index >= NarrowByteWidth)
5712       return Op.getOpcode() == ISD::ZERO_EXTEND
5713                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5714                  : None;
5715     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5716   }
5717   case ISD::BSWAP:
5718     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5719                                  Depth + 1);
5720   case ISD::LOAD: {
5721     auto L = cast<LoadSDNode>(Op.getNode());
5722     if (L->isVolatile() || L->isIndexed())
5723       return None;
5724 
5725     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5726     if (NarrowBitWidth % 8 != 0)
5727       return None;
5728     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5729 
5730     if (Index >= NarrowByteWidth)
5731       return L->getExtensionType() == ISD::ZEXTLOAD
5732                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5733                  : None;
5734     return ByteProvider::getMemory(L, Index);
5735   }
5736   }
5737 
5738   return None;
5739 }
5740 
5741 /// Match a pattern where a wide type scalar value is loaded by several narrow
5742 /// loads and combined by shifts and ors. Fold it into a single load or a load
5743 /// and a BSWAP if the targets supports it.
5744 ///
5745 /// Assuming little endian target:
5746 ///  i8 *a = ...
5747 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5748 /// =>
5749 ///  i32 val = *((i32)a)
5750 ///
5751 ///  i8 *a = ...
5752 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5753 /// =>
5754 ///  i32 val = BSWAP(*((i32)a))
5755 ///
5756 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5757 /// interact well with the worklist mechanism. When a part of the pattern is
5758 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5759 /// but the root node of the pattern which triggers the load combine is not
5760 /// necessarily a direct user of the changed node. For example, once the address
5761 /// of t28 load is reassociated load combine won't be triggered:
5762 ///             t25: i32 = add t4, Constant:i32<2>
5763 ///           t26: i64 = sign_extend t25
5764 ///        t27: i64 = add t2, t26
5765 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5766 ///     t29: i32 = zero_extend t28
5767 ///   t32: i32 = shl t29, Constant:i8<8>
5768 /// t33: i32 = or t23, t32
5769 /// As a possible fix visitLoad can check if the load can be a part of a load
5770 /// combine pattern and add corresponding OR roots to the worklist.
5771 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5772   assert(N->getOpcode() == ISD::OR &&
5773          "Can only match load combining against OR nodes");
5774 
5775   // Handles simple types only
5776   EVT VT = N->getValueType(0);
5777   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5778     return SDValue();
5779   unsigned ByteWidth = VT.getSizeInBits() / 8;
5780 
5781   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5782   // Before legalize we can introduce too wide illegal loads which will be later
5783   // split into legal sized loads. This enables us to combine i64 load by i8
5784   // patterns to a couple of i32 loads on 32 bit targets.
5785   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5786     return SDValue();
5787 
5788   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5789     unsigned BW, unsigned i) { return i; };
5790   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5791     unsigned BW, unsigned i) { return BW - i - 1; };
5792 
5793   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5794   auto MemoryByteOffset = [&] (ByteProvider P) {
5795     assert(P.isMemory() && "Must be a memory byte provider");
5796     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5797     assert(LoadBitWidth % 8 == 0 &&
5798            "can only analyze providers for individual bytes not bit");
5799     unsigned LoadByteWidth = LoadBitWidth / 8;
5800     return IsBigEndianTarget
5801             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5802             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5803   };
5804 
5805   Optional<BaseIndexOffset> Base;
5806   SDValue Chain;
5807 
5808   SmallPtrSet<LoadSDNode *, 8> Loads;
5809   Optional<ByteProvider> FirstByteProvider;
5810   int64_t FirstOffset = INT64_MAX;
5811 
5812   // Check if all the bytes of the OR we are looking at are loaded from the same
5813   // base address. Collect bytes offsets from Base address in ByteOffsets.
5814   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5815   for (unsigned i = 0; i < ByteWidth; i++) {
5816     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5817     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5818       return SDValue();
5819 
5820     LoadSDNode *L = P->Load;
5821     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5822            "Must be enforced by calculateByteProvider");
5823     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5824 
5825     // All loads must share the same chain
5826     SDValue LChain = L->getChain();
5827     if (!Chain)
5828       Chain = LChain;
5829     else if (Chain != LChain)
5830       return SDValue();
5831 
5832     // Loads must share the same base address
5833     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5834     int64_t ByteOffsetFromBase = 0;
5835     if (!Base)
5836       Base = Ptr;
5837     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5838       return SDValue();
5839 
5840     // Calculate the offset of the current byte from the base address
5841     ByteOffsetFromBase += MemoryByteOffset(*P);
5842     ByteOffsets[i] = ByteOffsetFromBase;
5843 
5844     // Remember the first byte load
5845     if (ByteOffsetFromBase < FirstOffset) {
5846       FirstByteProvider = P;
5847       FirstOffset = ByteOffsetFromBase;
5848     }
5849 
5850     Loads.insert(L);
5851   }
5852   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5853          "memory, so there must be at least one load which produces the value");
5854   assert(Base && "Base address of the accessed memory location must be set");
5855   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5856 
5857   // Check if the bytes of the OR we are looking at match with either big or
5858   // little endian value load
5859   bool BigEndian = true, LittleEndian = true;
5860   for (unsigned i = 0; i < ByteWidth; i++) {
5861     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5862     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5863     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5864     if (!BigEndian && !LittleEndian)
5865       return SDValue();
5866   }
5867   assert((BigEndian != LittleEndian) && "should be either or");
5868   assert(FirstByteProvider && "must be set");
5869 
5870   // Ensure that the first byte is loaded from zero offset of the first load.
5871   // So the combined value can be loaded from the first load address.
5872   if (MemoryByteOffset(*FirstByteProvider) != 0)
5873     return SDValue();
5874   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5875 
5876   // The node we are looking at matches with the pattern, check if we can
5877   // replace it with a single load and bswap if needed.
5878 
5879   // If the load needs byte swap check if the target supports it
5880   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5881 
5882   // Before legalize we can introduce illegal bswaps which will be later
5883   // converted to an explicit bswap sequence. This way we end up with a single
5884   // load and byte shuffling instead of several loads and byte shuffling.
5885   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5886     return SDValue();
5887 
5888   // Check that a load of the wide type is both allowed and fast on the target
5889   bool Fast = false;
5890   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5891                                         VT, FirstLoad->getAddressSpace(),
5892                                         FirstLoad->getAlignment(), &Fast);
5893   if (!Allowed || !Fast)
5894     return SDValue();
5895 
5896   SDValue NewLoad =
5897       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5898                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5899 
5900   // Transfer chain users from old loads to the new load.
5901   for (LoadSDNode *L : Loads)
5902     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5903 
5904   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5905 }
5906 
5907 // If the target has andn, bsl, or a similar bit-select instruction,
5908 // we want to unfold masked merge, with canonical pattern of:
5909 //   |        A  |  |B|
5910 //   ((x ^ y) & m) ^ y
5911 //    |  D  |
5912 // Into:
5913 //   (x & m) | (y & ~m)
5914 // If y is a constant, and the 'andn' does not work with immediates,
5915 // we unfold into a different pattern:
5916 //   ~(~x & m) & (m | y)
5917 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5918 //       the very least that breaks andnpd / andnps patterns, and because those
5919 //       patterns are simplified in IR and shouldn't be created in the DAG
5920 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5921   assert(N->getOpcode() == ISD::XOR);
5922 
5923   // Don't touch 'not' (i.e. where y = -1).
5924   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
5925     return SDValue();
5926 
5927   EVT VT = N->getValueType(0);
5928 
5929   // There are 3 commutable operators in the pattern,
5930   // so we have to deal with 8 possible variants of the basic pattern.
5931   SDValue X, Y, M;
5932   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5933     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5934       return false;
5935     SDValue Xor = And.getOperand(XorIdx);
5936     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5937       return false;
5938     SDValue Xor0 = Xor.getOperand(0);
5939     SDValue Xor1 = Xor.getOperand(1);
5940     // Don't touch 'not' (i.e. where y = -1).
5941     if (isAllOnesOrAllOnesSplat(Xor1))
5942       return false;
5943     if (Other == Xor0)
5944       std::swap(Xor0, Xor1);
5945     if (Other != Xor1)
5946       return false;
5947     X = Xor0;
5948     Y = Xor1;
5949     M = And.getOperand(XorIdx ? 0 : 1);
5950     return true;
5951   };
5952 
5953   SDValue N0 = N->getOperand(0);
5954   SDValue N1 = N->getOperand(1);
5955   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5956       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5957     return SDValue();
5958 
5959   // Don't do anything if the mask is constant. This should not be reachable.
5960   // InstCombine should have already unfolded this pattern, and DAGCombiner
5961   // probably shouldn't produce it, too.
5962   if (isa<ConstantSDNode>(M.getNode()))
5963     return SDValue();
5964 
5965   // We can transform if the target has AndNot
5966   if (!TLI.hasAndNot(M))
5967     return SDValue();
5968 
5969   SDLoc DL(N);
5970 
5971   // If Y is a constant, check that 'andn' works with immediates.
5972   if (!TLI.hasAndNot(Y)) {
5973     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
5974     // If not, we need to do a bit more work to make sure andn is still used.
5975     SDValue NotX = DAG.getNOT(DL, X, VT);
5976     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
5977     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
5978     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
5979     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
5980   }
5981 
5982   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
5983   SDValue NotM = DAG.getNOT(DL, M, VT);
5984   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
5985 
5986   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
5987 }
5988 
5989 SDValue DAGCombiner::visitXOR(SDNode *N) {
5990   SDValue N0 = N->getOperand(0);
5991   SDValue N1 = N->getOperand(1);
5992   EVT VT = N0.getValueType();
5993 
5994   // fold vector ops
5995   if (VT.isVector()) {
5996     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5997       return FoldedVOp;
5998 
5999     // fold (xor x, 0) -> x, vector edition
6000     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6001       return N1;
6002     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6003       return N0;
6004   }
6005 
6006   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6007   SDLoc DL(N);
6008   if (N0.isUndef() && N1.isUndef())
6009     return DAG.getConstant(0, DL, VT);
6010   // fold (xor x, undef) -> undef
6011   if (N0.isUndef())
6012     return N0;
6013   if (N1.isUndef())
6014     return N1;
6015   // fold (xor c1, c2) -> c1^c2
6016   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6017   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6018   if (N0C && N1C)
6019     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6020   // canonicalize constant to RHS
6021   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6022      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6023     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6024   // fold (xor x, 0) -> x
6025   if (isNullConstant(N1))
6026     return N0;
6027 
6028   if (SDValue NewSel = foldBinOpIntoSelect(N))
6029     return NewSel;
6030 
6031   // reassociate xor
6032   if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6033     return RXOR;
6034 
6035   // fold !(x cc y) -> (x !cc y)
6036   unsigned N0Opcode = N0.getOpcode();
6037   SDValue LHS, RHS, CC;
6038   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6039     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6040                                                LHS.getValueType().isInteger());
6041     if (!LegalOperations ||
6042         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6043       switch (N0Opcode) {
6044       default:
6045         llvm_unreachable("Unhandled SetCC Equivalent!");
6046       case ISD::SETCC:
6047         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6048       case ISD::SELECT_CC:
6049         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6050                                N0.getOperand(3), NotCC);
6051       }
6052     }
6053   }
6054 
6055   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6056   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6057       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6058     SDValue V = N0.getOperand(0);
6059     SDLoc DL0(N0);
6060     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6061                     DAG.getConstant(1, DL0, V.getValueType()));
6062     AddToWorklist(V.getNode());
6063     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6064   }
6065 
6066   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6067   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6068       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6069     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6070     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6071       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6072       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6073       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6074       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6075       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6076     }
6077   }
6078   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6079   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6080       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6081     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6082     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6083       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6084       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6085       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6086       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6087       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6088     }
6089   }
6090   // fold (xor (and x, y), y) -> (and (not x), y)
6091   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6092     SDValue X = N0.getOperand(0);
6093     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6094     AddToWorklist(NotX.getNode());
6095     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6096   }
6097 
6098   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6099     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6100     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6101     if (XorC && ShiftC) {
6102       APInt Ones = APInt::getAllOnesValue(VT.getScalarSizeInBits());
6103       Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftC->getZExtValue())
6104                                   : Ones.lshr(ShiftC->getZExtValue());
6105       if (XorC->getAPIntValue() == Ones) {
6106         // If the xor constant is a shifted -1, do a 'not' before the shift:
6107         // xor (X << ShiftC), XorC --> (not X) << ShiftC
6108         // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6109         SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6110         return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6111       }
6112     }
6113   }
6114 
6115   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6116   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6117     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6118     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6119     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6120       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6121       SDValue S0 = S.getOperand(0);
6122       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6123         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6124         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6125           if (C->getAPIntValue() == (OpSizeInBits - 1))
6126             return DAG.getNode(ISD::ABS, DL, VT, S0);
6127       }
6128     }
6129   }
6130 
6131   // fold (xor x, x) -> 0
6132   if (N0 == N1)
6133     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6134 
6135   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6136   // Here is a concrete example of this equivalence:
6137   // i16   x ==  14
6138   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6139   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6140   //
6141   // =>
6142   //
6143   // i16     ~1      == 0b1111111111111110
6144   // i16 rol(~1, 14) == 0b1011111111111111
6145   //
6146   // Some additional tips to help conceptualize this transform:
6147   // - Try to see the operation as placing a single zero in a value of all ones.
6148   // - There exists no value for x which would allow the result to contain zero.
6149   // - Values of x larger than the bitwidth are undefined and do not require a
6150   //   consistent result.
6151   // - Pushing the zero left requires shifting one bits in from the right.
6152   // A rotate left of ~1 is a nice way of achieving the desired result.
6153   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6154       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6155     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6156                        N0.getOperand(1));
6157   }
6158 
6159   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6160   if (N0Opcode == N1.getOpcode())
6161     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
6162       return Tmp;
6163 
6164   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6165   if (SDValue MM = unfoldMaskedMerge(N))
6166     return MM;
6167 
6168   // Simplify the expression using non-local knowledge.
6169   if (SimplifyDemandedBits(SDValue(N, 0)))
6170     return SDValue(N, 0);
6171 
6172   return SDValue();
6173 }
6174 
6175 /// Handle transforms common to the three shifts, when the shift amount is a
6176 /// constant.
6177 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6178   // Do not turn a 'not' into a regular xor.
6179   if (isBitwiseNot(N->getOperand(0)))
6180     return SDValue();
6181 
6182   SDNode *LHS = N->getOperand(0).getNode();
6183   if (!LHS->hasOneUse()) return SDValue();
6184 
6185   // We want to pull some binops through shifts, so that we have (and (shift))
6186   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6187   // thing happens with address calculations, so it's important to canonicalize
6188   // it.
6189   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6190 
6191   switch (LHS->getOpcode()) {
6192   default: return SDValue();
6193   case ISD::OR:
6194   case ISD::XOR:
6195     HighBitSet = false; // We can only transform sra if the high bit is clear.
6196     break;
6197   case ISD::AND:
6198     HighBitSet = true;  // We can only transform sra if the high bit is set.
6199     break;
6200   case ISD::ADD:
6201     if (N->getOpcode() != ISD::SHL)
6202       return SDValue(); // only shl(add) not sr[al](add).
6203     HighBitSet = false; // We can only transform sra if the high bit is clear.
6204     break;
6205   }
6206 
6207   // We require the RHS of the binop to be a constant and not opaque as well.
6208   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6209   if (!BinOpCst) return SDValue();
6210 
6211   // FIXME: disable this unless the input to the binop is a shift by a constant
6212   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6213   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6214   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6215                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6216                  BinOpLHSVal->getOpcode() == ISD::SRL;
6217   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6218                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6219 
6220   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6221       !isCopyOrSelect)
6222     return SDValue();
6223 
6224   if (isCopyOrSelect && N->hasOneUse())
6225     return SDValue();
6226 
6227   EVT VT = N->getValueType(0);
6228 
6229   // If this is a signed shift right, and the high bit is modified by the
6230   // logical operation, do not perform the transformation. The highBitSet
6231   // boolean indicates the value of the high bit of the constant which would
6232   // cause it to be modified for this operation.
6233   if (N->getOpcode() == ISD::SRA) {
6234     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6235     if (BinOpRHSSignSet != HighBitSet)
6236       return SDValue();
6237   }
6238 
6239   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6240     return SDValue();
6241 
6242   // Fold the constants, shifting the binop RHS by the shift amount.
6243   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6244                                N->getValueType(0),
6245                                LHS->getOperand(1), N->getOperand(1));
6246   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6247 
6248   // Create the new shift.
6249   SDValue NewShift = DAG.getNode(N->getOpcode(),
6250                                  SDLoc(LHS->getOperand(0)),
6251                                  VT, LHS->getOperand(0), N->getOperand(1));
6252 
6253   // Create the new binop.
6254   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6255 }
6256 
6257 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6258   assert(N->getOpcode() == ISD::TRUNCATE);
6259   assert(N->getOperand(0).getOpcode() == ISD::AND);
6260 
6261   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6262   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6263     SDValue N01 = N->getOperand(0).getOperand(1);
6264     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6265       SDLoc DL(N);
6266       EVT TruncVT = N->getValueType(0);
6267       SDValue N00 = N->getOperand(0).getOperand(0);
6268       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6269       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6270       AddToWorklist(Trunc00.getNode());
6271       AddToWorklist(Trunc01.getNode());
6272       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6273     }
6274   }
6275 
6276   return SDValue();
6277 }
6278 
6279 SDValue DAGCombiner::visitRotate(SDNode *N) {
6280   SDLoc dl(N);
6281   SDValue N0 = N->getOperand(0);
6282   SDValue N1 = N->getOperand(1);
6283   EVT VT = N->getValueType(0);
6284   unsigned Bitsize = VT.getScalarSizeInBits();
6285 
6286   // fold (rot x, 0) -> x
6287   if (isNullOrNullSplat(N1))
6288     return N0;
6289 
6290   // fold (rot x, c) -> (rot x, c % BitSize)
6291   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6292     if (Cst->getAPIntValue().uge(Bitsize)) {
6293       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6294       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6295                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6296     }
6297   }
6298 
6299   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6300   if (N1.getOpcode() == ISD::TRUNCATE &&
6301       N1.getOperand(0).getOpcode() == ISD::AND) {
6302     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6303       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6304   }
6305 
6306   unsigned NextOp = N0.getOpcode();
6307   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6308   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6309     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6310     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6311     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6312       EVT ShiftVT = C1->getValueType(0);
6313       bool SameSide = (N->getOpcode() == NextOp);
6314       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6315       if (SDValue CombinedShift =
6316               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6317         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6318         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6319             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6320             BitsizeC.getNode());
6321         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6322                            CombinedShiftNorm);
6323       }
6324     }
6325   }
6326   return SDValue();
6327 }
6328 
6329 SDValue DAGCombiner::visitSHL(SDNode *N) {
6330   SDValue N0 = N->getOperand(0);
6331   SDValue N1 = N->getOperand(1);
6332   if (SDValue V = DAG.simplifyShift(N0, N1))
6333     return V;
6334 
6335   EVT VT = N0.getValueType();
6336   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6337 
6338   // fold vector ops
6339   if (VT.isVector()) {
6340     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6341       return FoldedVOp;
6342 
6343     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6344     // If setcc produces all-one true value then:
6345     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6346     if (N1CV && N1CV->isConstant()) {
6347       if (N0.getOpcode() == ISD::AND) {
6348         SDValue N00 = N0->getOperand(0);
6349         SDValue N01 = N0->getOperand(1);
6350         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6351 
6352         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6353             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6354                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6355           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6356                                                      N01CV, N1CV))
6357             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6358         }
6359       }
6360     }
6361   }
6362 
6363   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6364 
6365   // fold (shl c1, c2) -> c1<<c2
6366   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6367   if (N0C && N1C && !N1C->isOpaque())
6368     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6369 
6370   if (SDValue NewSel = foldBinOpIntoSelect(N))
6371     return NewSel;
6372 
6373   // if (shl x, c) is known to be zero, return 0
6374   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6375                             APInt::getAllOnesValue(OpSizeInBits)))
6376     return DAG.getConstant(0, SDLoc(N), VT);
6377   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6378   if (N1.getOpcode() == ISD::TRUNCATE &&
6379       N1.getOperand(0).getOpcode() == ISD::AND) {
6380     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6381       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6382   }
6383 
6384   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6385     return SDValue(N, 0);
6386 
6387   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6388   if (N0.getOpcode() == ISD::SHL) {
6389     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6390                                           ConstantSDNode *RHS) {
6391       APInt c1 = LHS->getAPIntValue();
6392       APInt c2 = RHS->getAPIntValue();
6393       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6394       return (c1 + c2).uge(OpSizeInBits);
6395     };
6396     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6397       return DAG.getConstant(0, SDLoc(N), VT);
6398 
6399     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6400                                        ConstantSDNode *RHS) {
6401       APInt c1 = LHS->getAPIntValue();
6402       APInt c2 = RHS->getAPIntValue();
6403       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6404       return (c1 + c2).ult(OpSizeInBits);
6405     };
6406     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6407       SDLoc DL(N);
6408       EVT ShiftVT = N1.getValueType();
6409       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6410       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6411     }
6412   }
6413 
6414   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6415   // For this to be valid, the second form must not preserve any of the bits
6416   // that are shifted out by the inner shift in the first form.  This means
6417   // the outer shift size must be >= the number of bits added by the ext.
6418   // As a corollary, we don't care what kind of ext it is.
6419   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6420               N0.getOpcode() == ISD::ANY_EXTEND ||
6421               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6422       N0.getOperand(0).getOpcode() == ISD::SHL) {
6423     SDValue N0Op0 = N0.getOperand(0);
6424     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6425       APInt c1 = N0Op0C1->getAPIntValue();
6426       APInt c2 = N1C->getAPIntValue();
6427       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6428 
6429       EVT InnerShiftVT = N0Op0.getValueType();
6430       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6431       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6432         SDLoc DL(N0);
6433         APInt Sum = c1 + c2;
6434         if (Sum.uge(OpSizeInBits))
6435           return DAG.getConstant(0, DL, VT);
6436 
6437         return DAG.getNode(
6438             ISD::SHL, DL, VT,
6439             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6440             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6441       }
6442     }
6443   }
6444 
6445   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6446   // Only fold this if the inner zext has no other uses to avoid increasing
6447   // the total number of instructions.
6448   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6449       N0.getOperand(0).getOpcode() == ISD::SRL) {
6450     SDValue N0Op0 = N0.getOperand(0);
6451     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6452       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6453         uint64_t c1 = N0Op0C1->getZExtValue();
6454         uint64_t c2 = N1C->getZExtValue();
6455         if (c1 == c2) {
6456           SDValue NewOp0 = N0.getOperand(0);
6457           EVT CountVT = NewOp0.getOperand(1).getValueType();
6458           SDLoc DL(N);
6459           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6460                                        NewOp0,
6461                                        DAG.getConstant(c2, DL, CountVT));
6462           AddToWorklist(NewSHL.getNode());
6463           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6464         }
6465       }
6466     }
6467   }
6468 
6469   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6470   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6471   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6472       N0->getFlags().hasExact()) {
6473     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6474       uint64_t C1 = N0C1->getZExtValue();
6475       uint64_t C2 = N1C->getZExtValue();
6476       SDLoc DL(N);
6477       if (C1 <= C2)
6478         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6479                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6480       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6481                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6482     }
6483   }
6484 
6485   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6486   //                               (and (srl x, (sub c1, c2), MASK)
6487   // Only fold this if the inner shift has no other uses -- if it does, folding
6488   // this will increase the total number of instructions.
6489   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6490     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6491       uint64_t c1 = N0C1->getZExtValue();
6492       if (c1 < OpSizeInBits) {
6493         uint64_t c2 = N1C->getZExtValue();
6494         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6495         SDValue Shift;
6496         if (c2 > c1) {
6497           Mask <<= c2 - c1;
6498           SDLoc DL(N);
6499           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6500                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6501         } else {
6502           Mask.lshrInPlace(c1 - c2);
6503           SDLoc DL(N);
6504           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6505                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6506         }
6507         SDLoc DL(N0);
6508         return DAG.getNode(ISD::AND, DL, VT, Shift,
6509                            DAG.getConstant(Mask, DL, VT));
6510       }
6511     }
6512   }
6513 
6514   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6515   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6516       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6517     SDLoc DL(N);
6518     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6519     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6520     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6521   }
6522 
6523   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6524   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6525   // Variant of version done on multiply, except mul by a power of 2 is turned
6526   // into a shift.
6527   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6528       N0.getNode()->hasOneUse() &&
6529       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6530       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6531       TLI.isDesirableToCommuteWithShift(N, Level)) {
6532     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6533     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6534     AddToWorklist(Shl0.getNode());
6535     AddToWorklist(Shl1.getNode());
6536     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6537   }
6538 
6539   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6540   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6541       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6542       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6543     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6544     if (isConstantOrConstantVector(Shl))
6545       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6546   }
6547 
6548   if (N1C && !N1C->isOpaque())
6549     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6550       return NewSHL;
6551 
6552   return SDValue();
6553 }
6554 
6555 SDValue DAGCombiner::visitSRA(SDNode *N) {
6556   SDValue N0 = N->getOperand(0);
6557   SDValue N1 = N->getOperand(1);
6558   if (SDValue V = DAG.simplifyShift(N0, N1))
6559     return V;
6560 
6561   EVT VT = N0.getValueType();
6562   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6563 
6564   // Arithmetic shifting an all-sign-bit value is a no-op.
6565   // fold (sra 0, x) -> 0
6566   // fold (sra -1, x) -> -1
6567   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6568     return N0;
6569 
6570   // fold vector ops
6571   if (VT.isVector())
6572     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6573       return FoldedVOp;
6574 
6575   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6576 
6577   // fold (sra c1, c2) -> (sra c1, c2)
6578   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6579   if (N0C && N1C && !N1C->isOpaque())
6580     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6581 
6582   if (SDValue NewSel = foldBinOpIntoSelect(N))
6583     return NewSel;
6584 
6585   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6586   // sext_inreg.
6587   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6588     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6589     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6590     if (VT.isVector())
6591       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6592                                ExtVT, VT.getVectorNumElements());
6593     if ((!LegalOperations ||
6594          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6595       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6596                          N0.getOperand(0), DAG.getValueType(ExtVT));
6597   }
6598 
6599   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6600   // clamp (add c1, c2) to max shift.
6601   if (N0.getOpcode() == ISD::SRA) {
6602     SDLoc DL(N);
6603     EVT ShiftVT = N1.getValueType();
6604     EVT ShiftSVT = ShiftVT.getScalarType();
6605     SmallVector<SDValue, 16> ShiftValues;
6606 
6607     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6608       APInt c1 = LHS->getAPIntValue();
6609       APInt c2 = RHS->getAPIntValue();
6610       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6611       APInt Sum = c1 + c2;
6612       unsigned ShiftSum =
6613           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6614       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6615       return true;
6616     };
6617     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6618       SDValue ShiftValue;
6619       if (VT.isVector())
6620         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6621       else
6622         ShiftValue = ShiftValues[0];
6623       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6624     }
6625   }
6626 
6627   // fold (sra (shl X, m), (sub result_size, n))
6628   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6629   // result_size - n != m.
6630   // If truncate is free for the target sext(shl) is likely to result in better
6631   // code.
6632   if (N0.getOpcode() == ISD::SHL && N1C) {
6633     // Get the two constanst of the shifts, CN0 = m, CN = n.
6634     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6635     if (N01C) {
6636       LLVMContext &Ctx = *DAG.getContext();
6637       // Determine what the truncate's result bitsize and type would be.
6638       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6639 
6640       if (VT.isVector())
6641         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6642 
6643       // Determine the residual right-shift amount.
6644       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6645 
6646       // If the shift is not a no-op (in which case this should be just a sign
6647       // extend already), the truncated to type is legal, sign_extend is legal
6648       // on that type, and the truncate to that type is both legal and free,
6649       // perform the transform.
6650       if ((ShiftAmt > 0) &&
6651           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6652           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6653           TLI.isTruncateFree(VT, TruncVT)) {
6654         SDLoc DL(N);
6655         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6656             getShiftAmountTy(N0.getOperand(0).getValueType()));
6657         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6658                                     N0.getOperand(0), Amt);
6659         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6660                                     Shift);
6661         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6662                            N->getValueType(0), Trunc);
6663       }
6664     }
6665   }
6666 
6667   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6668   if (N1.getOpcode() == ISD::TRUNCATE &&
6669       N1.getOperand(0).getOpcode() == ISD::AND) {
6670     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6671       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6672   }
6673 
6674   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6675   //      if c1 is equal to the number of bits the trunc removes
6676   if (N0.getOpcode() == ISD::TRUNCATE &&
6677       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6678        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6679       N0.getOperand(0).hasOneUse() &&
6680       N0.getOperand(0).getOperand(1).hasOneUse() &&
6681       N1C) {
6682     SDValue N0Op0 = N0.getOperand(0);
6683     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6684       unsigned LargeShiftVal = LargeShift->getZExtValue();
6685       EVT LargeVT = N0Op0.getValueType();
6686 
6687       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6688         SDLoc DL(N);
6689         SDValue Amt =
6690           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6691                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6692         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6693                                   N0Op0.getOperand(0), Amt);
6694         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6695       }
6696     }
6697   }
6698 
6699   // Simplify, based on bits shifted out of the LHS.
6700   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6701     return SDValue(N, 0);
6702 
6703   // If the sign bit is known to be zero, switch this to a SRL.
6704   if (DAG.SignBitIsZero(N0))
6705     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6706 
6707   if (N1C && !N1C->isOpaque())
6708     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6709       return NewSRA;
6710 
6711   return SDValue();
6712 }
6713 
6714 SDValue DAGCombiner::visitSRL(SDNode *N) {
6715   SDValue N0 = N->getOperand(0);
6716   SDValue N1 = N->getOperand(1);
6717   if (SDValue V = DAG.simplifyShift(N0, N1))
6718     return V;
6719 
6720   EVT VT = N0.getValueType();
6721   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6722 
6723   // fold vector ops
6724   if (VT.isVector())
6725     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6726       return FoldedVOp;
6727 
6728   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6729 
6730   // fold (srl c1, c2) -> c1 >>u c2
6731   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6732   if (N0C && N1C && !N1C->isOpaque())
6733     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6734 
6735   if (SDValue NewSel = foldBinOpIntoSelect(N))
6736     return NewSel;
6737 
6738   // if (srl x, c) is known to be zero, return 0
6739   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6740                                    APInt::getAllOnesValue(OpSizeInBits)))
6741     return DAG.getConstant(0, SDLoc(N), VT);
6742 
6743   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6744   if (N0.getOpcode() == ISD::SRL) {
6745     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6746                                           ConstantSDNode *RHS) {
6747       APInt c1 = LHS->getAPIntValue();
6748       APInt c2 = RHS->getAPIntValue();
6749       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6750       return (c1 + c2).uge(OpSizeInBits);
6751     };
6752     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6753       return DAG.getConstant(0, SDLoc(N), VT);
6754 
6755     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6756                                        ConstantSDNode *RHS) {
6757       APInt c1 = LHS->getAPIntValue();
6758       APInt c2 = RHS->getAPIntValue();
6759       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6760       return (c1 + c2).ult(OpSizeInBits);
6761     };
6762     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6763       SDLoc DL(N);
6764       EVT ShiftVT = N1.getValueType();
6765       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6766       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6767     }
6768   }
6769 
6770   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6771   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6772       N0.getOperand(0).getOpcode() == ISD::SRL) {
6773     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6774       uint64_t c1 = N001C->getZExtValue();
6775       uint64_t c2 = N1C->getZExtValue();
6776       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6777       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6778       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6779       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6780       if (c1 + OpSizeInBits == InnerShiftSize) {
6781         SDLoc DL(N0);
6782         if (c1 + c2 >= InnerShiftSize)
6783           return DAG.getConstant(0, DL, VT);
6784         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6785                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6786                                        N0.getOperand(0).getOperand(0),
6787                                        DAG.getConstant(c1 + c2, DL,
6788                                                        ShiftCountVT)));
6789       }
6790     }
6791   }
6792 
6793   // fold (srl (shl x, c), c) -> (and x, cst2)
6794   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6795       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6796     SDLoc DL(N);
6797     SDValue Mask =
6798         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6799     AddToWorklist(Mask.getNode());
6800     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6801   }
6802 
6803   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6804   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6805     // Shifting in all undef bits?
6806     EVT SmallVT = N0.getOperand(0).getValueType();
6807     unsigned BitSize = SmallVT.getScalarSizeInBits();
6808     if (N1C->getZExtValue() >= BitSize)
6809       return DAG.getUNDEF(VT);
6810 
6811     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6812       uint64_t ShiftAmt = N1C->getZExtValue();
6813       SDLoc DL0(N0);
6814       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6815                                        N0.getOperand(0),
6816                           DAG.getConstant(ShiftAmt, DL0,
6817                                           getShiftAmountTy(SmallVT)));
6818       AddToWorklist(SmallShift.getNode());
6819       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6820       SDLoc DL(N);
6821       return DAG.getNode(ISD::AND, DL, VT,
6822                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6823                          DAG.getConstant(Mask, DL, VT));
6824     }
6825   }
6826 
6827   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6828   // bit, which is unmodified by sra.
6829   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6830     if (N0.getOpcode() == ISD::SRA)
6831       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6832   }
6833 
6834   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6835   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6836       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6837     KnownBits Known;
6838     DAG.computeKnownBits(N0.getOperand(0), Known);
6839 
6840     // If any of the input bits are KnownOne, then the input couldn't be all
6841     // zeros, thus the result of the srl will always be zero.
6842     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6843 
6844     // If all of the bits input the to ctlz node are known to be zero, then
6845     // the result of the ctlz is "32" and the result of the shift is one.
6846     APInt UnknownBits = ~Known.Zero;
6847     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6848 
6849     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6850     if (UnknownBits.isPowerOf2()) {
6851       // Okay, we know that only that the single bit specified by UnknownBits
6852       // could be set on input to the CTLZ node. If this bit is set, the SRL
6853       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6854       // to an SRL/XOR pair, which is likely to simplify more.
6855       unsigned ShAmt = UnknownBits.countTrailingZeros();
6856       SDValue Op = N0.getOperand(0);
6857 
6858       if (ShAmt) {
6859         SDLoc DL(N0);
6860         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6861                   DAG.getConstant(ShAmt, DL,
6862                                   getShiftAmountTy(Op.getValueType())));
6863         AddToWorklist(Op.getNode());
6864       }
6865 
6866       SDLoc DL(N);
6867       return DAG.getNode(ISD::XOR, DL, VT,
6868                          Op, DAG.getConstant(1, DL, VT));
6869     }
6870   }
6871 
6872   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6873   if (N1.getOpcode() == ISD::TRUNCATE &&
6874       N1.getOperand(0).getOpcode() == ISD::AND) {
6875     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6876       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6877   }
6878 
6879   // fold operands of srl based on knowledge that the low bits are not
6880   // demanded.
6881   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6882     return SDValue(N, 0);
6883 
6884   if (N1C && !N1C->isOpaque())
6885     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6886       return NewSRL;
6887 
6888   // Attempt to convert a srl of a load into a narrower zero-extending load.
6889   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6890     return NarrowLoad;
6891 
6892   // Here is a common situation. We want to optimize:
6893   //
6894   //   %a = ...
6895   //   %b = and i32 %a, 2
6896   //   %c = srl i32 %b, 1
6897   //   brcond i32 %c ...
6898   //
6899   // into
6900   //
6901   //   %a = ...
6902   //   %b = and %a, 2
6903   //   %c = setcc eq %b, 0
6904   //   brcond %c ...
6905   //
6906   // However when after the source operand of SRL is optimized into AND, the SRL
6907   // itself may not be optimized further. Look for it and add the BRCOND into
6908   // the worklist.
6909   if (N->hasOneUse()) {
6910     SDNode *Use = *N->use_begin();
6911     if (Use->getOpcode() == ISD::BRCOND)
6912       AddToWorklist(Use);
6913     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6914       // Also look pass the truncate.
6915       Use = *Use->use_begin();
6916       if (Use->getOpcode() == ISD::BRCOND)
6917         AddToWorklist(Use);
6918     }
6919   }
6920 
6921   return SDValue();
6922 }
6923 
6924 SDValue DAGCombiner::visitABS(SDNode *N) {
6925   SDValue N0 = N->getOperand(0);
6926   EVT VT = N->getValueType(0);
6927 
6928   // fold (abs c1) -> c2
6929   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6930     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6931   // fold (abs (abs x)) -> (abs x)
6932   if (N0.getOpcode() == ISD::ABS)
6933     return N0;
6934   // fold (abs x) -> x iff not-negative
6935   if (DAG.SignBitIsZero(N0))
6936     return N0;
6937   return SDValue();
6938 }
6939 
6940 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6941   SDValue N0 = N->getOperand(0);
6942   EVT VT = N->getValueType(0);
6943 
6944   // fold (bswap c1) -> c2
6945   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6946     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6947   // fold (bswap (bswap x)) -> x
6948   if (N0.getOpcode() == ISD::BSWAP)
6949     return N0->getOperand(0);
6950   return SDValue();
6951 }
6952 
6953 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6954   SDValue N0 = N->getOperand(0);
6955   EVT VT = N->getValueType(0);
6956 
6957   // fold (bitreverse c1) -> c2
6958   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6959     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6960   // fold (bitreverse (bitreverse x)) -> x
6961   if (N0.getOpcode() == ISD::BITREVERSE)
6962     return N0.getOperand(0);
6963   return SDValue();
6964 }
6965 
6966 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6967   SDValue N0 = N->getOperand(0);
6968   EVT VT = N->getValueType(0);
6969 
6970   // fold (ctlz c1) -> c2
6971   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6972     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6973 
6974   // If the value is known never to be zero, switch to the undef version.
6975   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6976     if (DAG.isKnownNeverZero(N0))
6977       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6978   }
6979 
6980   return SDValue();
6981 }
6982 
6983 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6984   SDValue N0 = N->getOperand(0);
6985   EVT VT = N->getValueType(0);
6986 
6987   // fold (ctlz_zero_undef c1) -> c2
6988   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6989     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6990   return SDValue();
6991 }
6992 
6993 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6994   SDValue N0 = N->getOperand(0);
6995   EVT VT = N->getValueType(0);
6996 
6997   // fold (cttz c1) -> c2
6998   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6999     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7000 
7001   // If the value is known never to be zero, switch to the undef version.
7002   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7003     if (DAG.isKnownNeverZero(N0))
7004       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7005   }
7006 
7007   return SDValue();
7008 }
7009 
7010 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7011   SDValue N0 = N->getOperand(0);
7012   EVT VT = N->getValueType(0);
7013 
7014   // fold (cttz_zero_undef c1) -> c2
7015   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7016     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7017   return SDValue();
7018 }
7019 
7020 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7021   SDValue N0 = N->getOperand(0);
7022   EVT VT = N->getValueType(0);
7023 
7024   // fold (ctpop c1) -> c2
7025   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7026     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7027   return SDValue();
7028 }
7029 
7030 // FIXME: This should be checking for no signed zeros on individual operands, as
7031 // well as no nans.
7032 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
7033   const TargetOptions &Options = DAG.getTarget().Options;
7034   EVT VT = LHS.getValueType();
7035 
7036   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7037          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7038 }
7039 
7040 /// Generate Min/Max node
7041 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7042                                    SDValue RHS, SDValue True, SDValue False,
7043                                    ISD::CondCode CC, const TargetLowering &TLI,
7044                                    SelectionDAG &DAG) {
7045   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7046     return SDValue();
7047 
7048   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7049   switch (CC) {
7050   case ISD::SETOLT:
7051   case ISD::SETOLE:
7052   case ISD::SETLT:
7053   case ISD::SETLE:
7054   case ISD::SETULT:
7055   case ISD::SETULE: {
7056     // Since it's known never nan to get here already, either fminnum or
7057     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7058     // expanded in terms of it.
7059     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7060     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7061       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7062 
7063     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7064     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7065       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7066     return SDValue();
7067   }
7068   case ISD::SETOGT:
7069   case ISD::SETOGE:
7070   case ISD::SETGT:
7071   case ISD::SETGE:
7072   case ISD::SETUGT:
7073   case ISD::SETUGE: {
7074     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7075     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7076       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7077 
7078     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7079     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7080       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7081     return SDValue();
7082   }
7083   default:
7084     return SDValue();
7085   }
7086 }
7087 
7088 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7089   SDValue Cond = N->getOperand(0);
7090   SDValue N1 = N->getOperand(1);
7091   SDValue N2 = N->getOperand(2);
7092   EVT VT = N->getValueType(0);
7093   EVT CondVT = Cond.getValueType();
7094   SDLoc DL(N);
7095 
7096   if (!VT.isInteger())
7097     return SDValue();
7098 
7099   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7100   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7101   if (!C1 || !C2)
7102     return SDValue();
7103 
7104   // Only do this before legalization to avoid conflicting with target-specific
7105   // transforms in the other direction (create a select from a zext/sext). There
7106   // is also a target-independent combine here in DAGCombiner in the other
7107   // direction for (select Cond, -1, 0) when the condition is not i1.
7108   if (CondVT == MVT::i1 && !LegalOperations) {
7109     if (C1->isNullValue() && C2->isOne()) {
7110       // select Cond, 0, 1 --> zext (!Cond)
7111       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7112       if (VT != MVT::i1)
7113         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7114       return NotCond;
7115     }
7116     if (C1->isNullValue() && C2->isAllOnesValue()) {
7117       // select Cond, 0, -1 --> sext (!Cond)
7118       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7119       if (VT != MVT::i1)
7120         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7121       return NotCond;
7122     }
7123     if (C1->isOne() && C2->isNullValue()) {
7124       // select Cond, 1, 0 --> zext (Cond)
7125       if (VT != MVT::i1)
7126         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7127       return Cond;
7128     }
7129     if (C1->isAllOnesValue() && C2->isNullValue()) {
7130       // select Cond, -1, 0 --> sext (Cond)
7131       if (VT != MVT::i1)
7132         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7133       return Cond;
7134     }
7135 
7136     // For any constants that differ by 1, we can transform the select into an
7137     // extend and add. Use a target hook because some targets may prefer to
7138     // transform in the other direction.
7139     if (TLI.convertSelectOfConstantsToMath(VT)) {
7140       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7141         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7142         if (VT != MVT::i1)
7143           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7144         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7145       }
7146       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7147         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7148         if (VT != MVT::i1)
7149           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7150         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7151       }
7152     }
7153 
7154     return SDValue();
7155   }
7156 
7157   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7158   // We can't do this reliably if integer based booleans have different contents
7159   // to floating point based booleans. This is because we can't tell whether we
7160   // have an integer-based boolean or a floating-point-based boolean unless we
7161   // can find the SETCC that produced it and inspect its operands. This is
7162   // fairly easy if C is the SETCC node, but it can potentially be
7163   // undiscoverable (or not reasonably discoverable). For example, it could be
7164   // in another basic block or it could require searching a complicated
7165   // expression.
7166   if (CondVT.isInteger() &&
7167       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7168           TargetLowering::ZeroOrOneBooleanContent &&
7169       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7170           TargetLowering::ZeroOrOneBooleanContent &&
7171       C1->isNullValue() && C2->isOne()) {
7172     SDValue NotCond =
7173         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7174     if (VT.bitsEq(CondVT))
7175       return NotCond;
7176     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7177   }
7178 
7179   return SDValue();
7180 }
7181 
7182 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7183   SDValue N0 = N->getOperand(0);
7184   SDValue N1 = N->getOperand(1);
7185   SDValue N2 = N->getOperand(2);
7186   EVT VT = N->getValueType(0);
7187   EVT VT0 = N0.getValueType();
7188   SDLoc DL(N);
7189 
7190   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7191     return V;
7192 
7193   // fold (select X, X, Y) -> (or X, Y)
7194   // fold (select X, 1, Y) -> (or C, Y)
7195   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7196     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7197 
7198   if (SDValue V = foldSelectOfConstants(N))
7199     return V;
7200 
7201   // fold (select C, 0, X) -> (and (not C), X)
7202   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7203     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7204     AddToWorklist(NOTNode.getNode());
7205     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7206   }
7207   // fold (select C, X, 1) -> (or (not C), X)
7208   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7209     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7210     AddToWorklist(NOTNode.getNode());
7211     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7212   }
7213   // fold (select X, Y, X) -> (and X, Y)
7214   // fold (select X, Y, 0) -> (and X, Y)
7215   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7216     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7217 
7218   // If we can fold this based on the true/false value, do so.
7219   if (SimplifySelectOps(N, N1, N2))
7220     return SDValue(N, 0); // Don't revisit N.
7221 
7222   if (VT0 == MVT::i1) {
7223     // The code in this block deals with the following 2 equivalences:
7224     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7225     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7226     // The target can specify its preferred form with the
7227     // shouldNormalizeToSelectSequence() callback. However we always transform
7228     // to the right anyway if we find the inner select exists in the DAG anyway
7229     // and we always transform to the left side if we know that we can further
7230     // optimize the combination of the conditions.
7231     bool normalizeToSequence =
7232         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7233     // select (and Cond0, Cond1), X, Y
7234     //   -> select Cond0, (select Cond1, X, Y), Y
7235     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7236       SDValue Cond0 = N0->getOperand(0);
7237       SDValue Cond1 = N0->getOperand(1);
7238       SDValue InnerSelect =
7239           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7240       if (normalizeToSequence || !InnerSelect.use_empty())
7241         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7242                            InnerSelect, N2);
7243     }
7244     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7245     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7246       SDValue Cond0 = N0->getOperand(0);
7247       SDValue Cond1 = N0->getOperand(1);
7248       SDValue InnerSelect =
7249           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7250       if (normalizeToSequence || !InnerSelect.use_empty())
7251         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7252                            InnerSelect);
7253     }
7254 
7255     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7256     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7257       SDValue N1_0 = N1->getOperand(0);
7258       SDValue N1_1 = N1->getOperand(1);
7259       SDValue N1_2 = N1->getOperand(2);
7260       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7261         // Create the actual and node if we can generate good code for it.
7262         if (!normalizeToSequence) {
7263           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7264           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7265         }
7266         // Otherwise see if we can optimize the "and" to a better pattern.
7267         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7268           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7269                              N2);
7270       }
7271     }
7272     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7273     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7274       SDValue N2_0 = N2->getOperand(0);
7275       SDValue N2_1 = N2->getOperand(1);
7276       SDValue N2_2 = N2->getOperand(2);
7277       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7278         // Create the actual or node if we can generate good code for it.
7279         if (!normalizeToSequence) {
7280           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7281           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7282         }
7283         // Otherwise see if we can optimize to a better pattern.
7284         if (SDValue Combined = visitORLike(N0, N2_0, N))
7285           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7286                              N2_2);
7287       }
7288     }
7289   }
7290 
7291   if (VT0 == MVT::i1) {
7292     // select (not Cond), N1, N2 -> select Cond, N2, N1
7293     if (isBitwiseNot(N0))
7294       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7295   }
7296 
7297   // Fold selects based on a setcc into other things, such as min/max/abs.
7298   if (N0.getOpcode() == ISD::SETCC) {
7299     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7300     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7301 
7302     // select (fcmp lt x, y), x, y -> fminnum x, y
7303     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7304     //
7305     // This is OK if we don't care what happens if either operand is a NaN.
7306     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
7307       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7308                                                 CC, TLI, DAG))
7309         return FMinMax;
7310 
7311     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7312     // This is conservatively limited to pre-legal-operations to give targets
7313     // a chance to reverse the transform if they want to do that. Also, it is
7314     // unlikely that the pattern would be formed late, so it's probably not
7315     // worth going through the other checks.
7316     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7317         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7318         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7319       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7320       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7321       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7322         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7323         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7324         //
7325         // The IR equivalent of this transform would have this form:
7326         //   %a = add %x, C
7327         //   %c = icmp ugt %x, ~C
7328         //   %r = select %c, -1, %a
7329         //   =>
7330         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7331         //   %u0 = extractvalue %u, 0
7332         //   %u1 = extractvalue %u, 1
7333         //   %r = select %u1, -1, %u0
7334         SDVTList VTs = DAG.getVTList(VT, VT0);
7335         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7336         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7337       }
7338     }
7339 
7340     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7341         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7342       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7343                          N0.getOperand(2));
7344 
7345     return SimplifySelect(DL, N0, N1, N2);
7346   }
7347 
7348   return SDValue();
7349 }
7350 
7351 static
7352 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7353   SDLoc DL(N);
7354   EVT LoVT, HiVT;
7355   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7356 
7357   // Split the inputs.
7358   SDValue Lo, Hi, LL, LH, RL, RH;
7359   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7360   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7361 
7362   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7363   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7364 
7365   return std::make_pair(Lo, Hi);
7366 }
7367 
7368 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7369 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7370 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7371   SDLoc DL(N);
7372   SDValue Cond = N->getOperand(0);
7373   SDValue LHS = N->getOperand(1);
7374   SDValue RHS = N->getOperand(2);
7375   EVT VT = N->getValueType(0);
7376   int NumElems = VT.getVectorNumElements();
7377   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7378          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7379          Cond.getOpcode() == ISD::BUILD_VECTOR);
7380 
7381   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7382   // binary ones here.
7383   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7384     return SDValue();
7385 
7386   // We're sure we have an even number of elements due to the
7387   // concat_vectors we have as arguments to vselect.
7388   // Skip BV elements until we find one that's not an UNDEF
7389   // After we find an UNDEF element, keep looping until we get to half the
7390   // length of the BV and see if all the non-undef nodes are the same.
7391   ConstantSDNode *BottomHalf = nullptr;
7392   for (int i = 0; i < NumElems / 2; ++i) {
7393     if (Cond->getOperand(i)->isUndef())
7394       continue;
7395 
7396     if (BottomHalf == nullptr)
7397       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7398     else if (Cond->getOperand(i).getNode() != BottomHalf)
7399       return SDValue();
7400   }
7401 
7402   // Do the same for the second half of the BuildVector
7403   ConstantSDNode *TopHalf = nullptr;
7404   for (int i = NumElems / 2; i < NumElems; ++i) {
7405     if (Cond->getOperand(i)->isUndef())
7406       continue;
7407 
7408     if (TopHalf == nullptr)
7409       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7410     else if (Cond->getOperand(i).getNode() != TopHalf)
7411       return SDValue();
7412   }
7413 
7414   assert(TopHalf && BottomHalf &&
7415          "One half of the selector was all UNDEFs and the other was all the "
7416          "same value. This should have been addressed before this function.");
7417   return DAG.getNode(
7418       ISD::CONCAT_VECTORS, DL, VT,
7419       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7420       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7421 }
7422 
7423 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7424   if (Level >= AfterLegalizeTypes)
7425     return SDValue();
7426 
7427   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7428   SDValue Mask = MSC->getMask();
7429   SDValue Data  = MSC->getValue();
7430   SDLoc DL(N);
7431 
7432   // If the MSCATTER data type requires splitting and the mask is provided by a
7433   // SETCC, then split both nodes and its operands before legalization. This
7434   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7435   // and enables future optimizations (e.g. min/max pattern matching on X86).
7436   if (Mask.getOpcode() != ISD::SETCC)
7437     return SDValue();
7438 
7439   // Check if any splitting is required.
7440   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7441       TargetLowering::TypeSplitVector)
7442     return SDValue();
7443   SDValue MaskLo, MaskHi;
7444   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7445 
7446   EVT LoVT, HiVT;
7447   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7448 
7449   SDValue Chain = MSC->getChain();
7450 
7451   EVT MemoryVT = MSC->getMemoryVT();
7452   unsigned Alignment = MSC->getOriginalAlignment();
7453 
7454   EVT LoMemVT, HiMemVT;
7455   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7456 
7457   SDValue DataLo, DataHi;
7458   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7459 
7460   SDValue Scale = MSC->getScale();
7461   SDValue BasePtr = MSC->getBasePtr();
7462   SDValue IndexLo, IndexHi;
7463   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7464 
7465   MachineMemOperand *MMO = DAG.getMachineFunction().
7466     getMachineMemOperand(MSC->getPointerInfo(),
7467                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7468                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7469 
7470   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7471   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7472                                     DataLo.getValueType(), DL, OpsLo, MMO);
7473 
7474   // The order of the Scatter operation after split is well defined. The "Hi"
7475   // part comes after the "Lo". So these two operations should be chained one
7476   // after another.
7477   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7478   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7479                               DL, OpsHi, MMO);
7480 }
7481 
7482 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7483   if (Level >= AfterLegalizeTypes)
7484     return SDValue();
7485 
7486   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7487   SDValue Mask = MST->getMask();
7488   SDValue Data  = MST->getValue();
7489   EVT VT = Data.getValueType();
7490   SDLoc DL(N);
7491 
7492   // If the MSTORE data type requires splitting and the mask is provided by a
7493   // SETCC, then split both nodes and its operands before legalization. This
7494   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7495   // and enables future optimizations (e.g. min/max pattern matching on X86).
7496   if (Mask.getOpcode() == ISD::SETCC) {
7497     // Check if any splitting is required.
7498     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7499         TargetLowering::TypeSplitVector)
7500       return SDValue();
7501 
7502     SDValue MaskLo, MaskHi, Lo, Hi;
7503     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7504 
7505     SDValue Chain = MST->getChain();
7506     SDValue Ptr   = MST->getBasePtr();
7507 
7508     EVT MemoryVT = MST->getMemoryVT();
7509     unsigned Alignment = MST->getOriginalAlignment();
7510 
7511     // if Alignment is equal to the vector size,
7512     // take the half of it for the second part
7513     unsigned SecondHalfAlignment =
7514       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7515 
7516     EVT LoMemVT, HiMemVT;
7517     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7518 
7519     SDValue DataLo, DataHi;
7520     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7521 
7522     MachineMemOperand *MMO = DAG.getMachineFunction().
7523       getMachineMemOperand(MST->getPointerInfo(),
7524                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7525                            Alignment, MST->getAAInfo(), MST->getRanges());
7526 
7527     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7528                             MST->isTruncatingStore(),
7529                             MST->isCompressingStore());
7530 
7531     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7532                                      MST->isCompressingStore());
7533     unsigned HiOffset = LoMemVT.getStoreSize();
7534 
7535     MMO = DAG.getMachineFunction().getMachineMemOperand(
7536         MST->getPointerInfo().getWithOffset(HiOffset),
7537         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7538         MST->getAAInfo(), MST->getRanges());
7539 
7540     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7541                             MST->isTruncatingStore(),
7542                             MST->isCompressingStore());
7543 
7544     AddToWorklist(Lo.getNode());
7545     AddToWorklist(Hi.getNode());
7546 
7547     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7548   }
7549   return SDValue();
7550 }
7551 
7552 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7553   if (Level >= AfterLegalizeTypes)
7554     return SDValue();
7555 
7556   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7557   SDValue Mask = MGT->getMask();
7558   SDLoc DL(N);
7559 
7560   // If the MGATHER result requires splitting and the mask is provided by a
7561   // SETCC, then split both nodes and its operands before legalization. This
7562   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7563   // and enables future optimizations (e.g. min/max pattern matching on X86).
7564 
7565   if (Mask.getOpcode() != ISD::SETCC)
7566     return SDValue();
7567 
7568   EVT VT = N->getValueType(0);
7569 
7570   // Check if any splitting is required.
7571   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7572       TargetLowering::TypeSplitVector)
7573     return SDValue();
7574 
7575   SDValue MaskLo, MaskHi, Lo, Hi;
7576   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7577 
7578   SDValue PassThru = MGT->getPassThru();
7579   SDValue PassThruLo, PassThruHi;
7580   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7581 
7582   EVT LoVT, HiVT;
7583   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7584 
7585   SDValue Chain = MGT->getChain();
7586   EVT MemoryVT = MGT->getMemoryVT();
7587   unsigned Alignment = MGT->getOriginalAlignment();
7588 
7589   EVT LoMemVT, HiMemVT;
7590   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7591 
7592   SDValue Scale = MGT->getScale();
7593   SDValue BasePtr = MGT->getBasePtr();
7594   SDValue Index = MGT->getIndex();
7595   SDValue IndexLo, IndexHi;
7596   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7597 
7598   MachineMemOperand *MMO = DAG.getMachineFunction().
7599     getMachineMemOperand(MGT->getPointerInfo(),
7600                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7601                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7602 
7603   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7604   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7605                            MMO);
7606 
7607   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7608   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7609                            MMO);
7610 
7611   AddToWorklist(Lo.getNode());
7612   AddToWorklist(Hi.getNode());
7613 
7614   // Build a factor node to remember that this load is independent of the
7615   // other one.
7616   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7617                       Hi.getValue(1));
7618 
7619   // Legalized the chain result - switch anything that used the old chain to
7620   // use the new one.
7621   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7622 
7623   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7624 
7625   SDValue RetOps[] = { GatherRes, Chain };
7626   return DAG.getMergeValues(RetOps, DL);
7627 }
7628 
7629 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7630   if (Level >= AfterLegalizeTypes)
7631     return SDValue();
7632 
7633   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7634   SDValue Mask = MLD->getMask();
7635   SDLoc DL(N);
7636 
7637   // If the MLOAD result requires splitting and the mask is provided by a
7638   // SETCC, then split both nodes and its operands before legalization. This
7639   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7640   // and enables future optimizations (e.g. min/max pattern matching on X86).
7641   if (Mask.getOpcode() == ISD::SETCC) {
7642     EVT VT = N->getValueType(0);
7643 
7644     // Check if any splitting is required.
7645     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7646         TargetLowering::TypeSplitVector)
7647       return SDValue();
7648 
7649     SDValue MaskLo, MaskHi, Lo, Hi;
7650     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7651 
7652     SDValue PassThru = MLD->getPassThru();
7653     SDValue PassThruLo, PassThruHi;
7654     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7655 
7656     EVT LoVT, HiVT;
7657     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7658 
7659     SDValue Chain = MLD->getChain();
7660     SDValue Ptr   = MLD->getBasePtr();
7661     EVT MemoryVT = MLD->getMemoryVT();
7662     unsigned Alignment = MLD->getOriginalAlignment();
7663 
7664     // if Alignment is equal to the vector size,
7665     // take the half of it for the second part
7666     unsigned SecondHalfAlignment =
7667       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7668          Alignment/2 : Alignment;
7669 
7670     EVT LoMemVT, HiMemVT;
7671     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7672 
7673     MachineMemOperand *MMO = DAG.getMachineFunction().
7674     getMachineMemOperand(MLD->getPointerInfo(),
7675                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7676                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7677 
7678     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7679                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7680 
7681     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7682                                      MLD->isExpandingLoad());
7683     unsigned HiOffset = LoMemVT.getStoreSize();
7684 
7685     MMO = DAG.getMachineFunction().getMachineMemOperand(
7686         MLD->getPointerInfo().getWithOffset(HiOffset),
7687         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7688         MLD->getAAInfo(), MLD->getRanges());
7689 
7690     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7691                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7692 
7693     AddToWorklist(Lo.getNode());
7694     AddToWorklist(Hi.getNode());
7695 
7696     // Build a factor node to remember that this load is independent of the
7697     // other one.
7698     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7699                         Hi.getValue(1));
7700 
7701     // Legalized the chain result - switch anything that used the old chain to
7702     // use the new one.
7703     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7704 
7705     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7706 
7707     SDValue RetOps[] = { LoadRes, Chain };
7708     return DAG.getMergeValues(RetOps, DL);
7709   }
7710   return SDValue();
7711 }
7712 
7713 /// A vector select of 2 constant vectors can be simplified to math/logic to
7714 /// avoid a variable select instruction and possibly avoid constant loads.
7715 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7716   SDValue Cond = N->getOperand(0);
7717   SDValue N1 = N->getOperand(1);
7718   SDValue N2 = N->getOperand(2);
7719   EVT VT = N->getValueType(0);
7720   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7721       !TLI.convertSelectOfConstantsToMath(VT) ||
7722       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7723       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7724     return SDValue();
7725 
7726   // Check if we can use the condition value to increment/decrement a single
7727   // constant value. This simplifies a select to an add and removes a constant
7728   // load/materialization from the general case.
7729   bool AllAddOne = true;
7730   bool AllSubOne = true;
7731   unsigned Elts = VT.getVectorNumElements();
7732   for (unsigned i = 0; i != Elts; ++i) {
7733     SDValue N1Elt = N1.getOperand(i);
7734     SDValue N2Elt = N2.getOperand(i);
7735     if (N1Elt.isUndef() || N2Elt.isUndef())
7736       continue;
7737 
7738     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7739     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7740     if (C1 != C2 + 1)
7741       AllAddOne = false;
7742     if (C1 != C2 - 1)
7743       AllSubOne = false;
7744   }
7745 
7746   // Further simplifications for the extra-special cases where the constants are
7747   // all 0 or all -1 should be implemented as folds of these patterns.
7748   SDLoc DL(N);
7749   if (AllAddOne || AllSubOne) {
7750     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7751     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7752     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7753     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7754     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7755   }
7756 
7757   // The general case for select-of-constants:
7758   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7759   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7760   // leave that to a machine-specific pass.
7761   return SDValue();
7762 }
7763 
7764 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7765   SDValue N0 = N->getOperand(0);
7766   SDValue N1 = N->getOperand(1);
7767   SDValue N2 = N->getOperand(2);
7768   SDLoc DL(N);
7769 
7770   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7771     return V;
7772 
7773   // Canonicalize integer abs.
7774   // vselect (setg[te] X,  0),  X, -X ->
7775   // vselect (setgt    X, -1),  X, -X ->
7776   // vselect (setl[te] X,  0), -X,  X ->
7777   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7778   if (N0.getOpcode() == ISD::SETCC) {
7779     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7780     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7781     bool isAbs = false;
7782     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7783 
7784     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7785          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7786         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7787       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7788     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7789              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7790       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7791 
7792     if (isAbs) {
7793       EVT VT = LHS.getValueType();
7794       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7795         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7796 
7797       SDValue Shift = DAG.getNode(
7798           ISD::SRA, DL, VT, LHS,
7799           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7800       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7801       AddToWorklist(Shift.getNode());
7802       AddToWorklist(Add.getNode());
7803       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7804     }
7805 
7806     // vselect x, y (fcmp lt x, y) -> fminnum x, y
7807     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
7808     //
7809     // This is OK if we don't care about what happens if either operand is a
7810     // NaN.
7811     //
7812     EVT VT = N->getValueType(0);
7813     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
7814       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7815       if (SDValue FMinMax = combineMinNumMaxNum(
7816             DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7817         return FMinMax;
7818     }
7819 
7820     // If this select has a condition (setcc) with narrower operands than the
7821     // select, try to widen the compare to match the select width.
7822     // TODO: This should be extended to handle any constant.
7823     // TODO: This could be extended to handle non-loading patterns, but that
7824     //       requires thorough testing to avoid regressions.
7825     if (isNullOrNullSplat(RHS)) {
7826       EVT NarrowVT = LHS.getValueType();
7827       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7828       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7829       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7830       unsigned WideWidth = WideVT.getScalarSizeInBits();
7831       bool IsSigned = isSignedIntSetCC(CC);
7832       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7833       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7834           SetCCWidth != 1 && SetCCWidth < WideWidth &&
7835           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7836           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7837         // Both compare operands can be widened for free. The LHS can use an
7838         // extended load, and the RHS is a constant:
7839         //   vselect (ext (setcc load(X), C)), N1, N2 -->
7840         //   vselect (setcc extload(X), C'), N1, N2
7841         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7842         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7843         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7844         EVT WideSetCCVT = getSetCCResultType(WideVT);
7845         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7846         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7847       }
7848     }
7849   }
7850 
7851   if (SimplifySelectOps(N, N1, N2))
7852     return SDValue(N, 0);  // Don't revisit N.
7853 
7854   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7855   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7856     return N1;
7857   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7858   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7859     return N2;
7860 
7861   // The ConvertSelectToConcatVector function is assuming both the above
7862   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7863   // and addressed.
7864   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7865       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7866       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7867     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7868       return CV;
7869   }
7870 
7871   if (SDValue V = foldVSelectOfConstants(N))
7872     return V;
7873 
7874   return SDValue();
7875 }
7876 
7877 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7878   SDValue N0 = N->getOperand(0);
7879   SDValue N1 = N->getOperand(1);
7880   SDValue N2 = N->getOperand(2);
7881   SDValue N3 = N->getOperand(3);
7882   SDValue N4 = N->getOperand(4);
7883   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7884 
7885   // fold select_cc lhs, rhs, x, x, cc -> x
7886   if (N2 == N3)
7887     return N2;
7888 
7889   // Determine if the condition we're dealing with is constant
7890   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7891                                   CC, SDLoc(N), false)) {
7892     AddToWorklist(SCC.getNode());
7893 
7894     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7895       if (!SCCC->isNullValue())
7896         return N2;    // cond always true -> true val
7897       else
7898         return N3;    // cond always false -> false val
7899     } else if (SCC->isUndef()) {
7900       // When the condition is UNDEF, just return the first operand. This is
7901       // coherent the DAG creation, no setcc node is created in this case
7902       return N2;
7903     } else if (SCC.getOpcode() == ISD::SETCC) {
7904       // Fold to a simpler select_cc
7905       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7906                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7907                          SCC.getOperand(2));
7908     }
7909   }
7910 
7911   // If we can fold this based on the true/false value, do so.
7912   if (SimplifySelectOps(N, N2, N3))
7913     return SDValue(N, 0);  // Don't revisit N.
7914 
7915   // fold select_cc into other things, such as min/max/abs
7916   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7917 }
7918 
7919 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7920   // setcc is very commonly used as an argument to brcond. This pattern
7921   // also lend itself to numerous combines and, as a result, it is desired
7922   // we keep the argument to a brcond as a setcc as much as possible.
7923   bool PreferSetCC =
7924       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7925 
7926   SDValue Combined = SimplifySetCC(
7927       N->getValueType(0), N->getOperand(0), N->getOperand(1),
7928       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7929 
7930   if (!Combined)
7931     return SDValue();
7932 
7933   // If we prefer to have a setcc, and we don't, we'll try our best to
7934   // recreate one using rebuildSetCC.
7935   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7936     SDValue NewSetCC = rebuildSetCC(Combined);
7937 
7938     // We don't have anything interesting to combine to.
7939     if (NewSetCC.getNode() == N)
7940       return SDValue();
7941 
7942     if (NewSetCC)
7943       return NewSetCC;
7944   }
7945 
7946   return Combined;
7947 }
7948 
7949 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7950   SDValue LHS = N->getOperand(0);
7951   SDValue RHS = N->getOperand(1);
7952   SDValue Carry = N->getOperand(2);
7953   SDValue Cond = N->getOperand(3);
7954 
7955   // If Carry is false, fold to a regular SETCC.
7956   if (isNullConstant(Carry))
7957     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7958 
7959   return SDValue();
7960 }
7961 
7962 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7963 /// a build_vector of constants.
7964 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7965 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7966 /// Vector extends are not folded if operations are legal; this is to
7967 /// avoid introducing illegal build_vector dag nodes.
7968 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7969                                          SelectionDAG &DAG, bool LegalTypes) {
7970   unsigned Opcode = N->getOpcode();
7971   SDValue N0 = N->getOperand(0);
7972   EVT VT = N->getValueType(0);
7973 
7974   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7975          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7976          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7977          && "Expected EXTEND dag node in input!");
7978 
7979   // fold (sext c1) -> c1
7980   // fold (zext c1) -> c1
7981   // fold (aext c1) -> c1
7982   if (isa<ConstantSDNode>(N0))
7983     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
7984 
7985   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7986   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7987   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7988   EVT SVT = VT.getScalarType();
7989   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
7990       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7991     return SDValue();
7992 
7993   // We can fold this node into a build_vector.
7994   unsigned VTBits = SVT.getSizeInBits();
7995   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7996   SmallVector<SDValue, 8> Elts;
7997   unsigned NumElts = VT.getVectorNumElements();
7998   SDLoc DL(N);
7999 
8000   for (unsigned i=0; i != NumElts; ++i) {
8001     SDValue Op = N0->getOperand(i);
8002     if (Op->isUndef()) {
8003       Elts.push_back(DAG.getUNDEF(SVT));
8004       continue;
8005     }
8006 
8007     SDLoc DL(Op);
8008     // Get the constant value and if needed trunc it to the size of the type.
8009     // Nodes like build_vector might have constants wider than the scalar type.
8010     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8011     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8012       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8013     else
8014       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8015   }
8016 
8017   return DAG.getBuildVector(VT, DL, Elts);
8018 }
8019 
8020 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8021 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8022 // transformation. Returns true if extension are possible and the above
8023 // mentioned transformation is profitable.
8024 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8025                                     unsigned ExtOpc,
8026                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8027                                     const TargetLowering &TLI) {
8028   bool HasCopyToRegUses = false;
8029   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8030   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8031                             UE = N0.getNode()->use_end();
8032        UI != UE; ++UI) {
8033     SDNode *User = *UI;
8034     if (User == N)
8035       continue;
8036     if (UI.getUse().getResNo() != N0.getResNo())
8037       continue;
8038     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8039     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8040       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8041       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8042         // Sign bits will be lost after a zext.
8043         return false;
8044       bool Add = false;
8045       for (unsigned i = 0; i != 2; ++i) {
8046         SDValue UseOp = User->getOperand(i);
8047         if (UseOp == N0)
8048           continue;
8049         if (!isa<ConstantSDNode>(UseOp))
8050           return false;
8051         Add = true;
8052       }
8053       if (Add)
8054         ExtendNodes.push_back(User);
8055       continue;
8056     }
8057     // If truncates aren't free and there are users we can't
8058     // extend, it isn't worthwhile.
8059     if (!isTruncFree)
8060       return false;
8061     // Remember if this value is live-out.
8062     if (User->getOpcode() == ISD::CopyToReg)
8063       HasCopyToRegUses = true;
8064   }
8065 
8066   if (HasCopyToRegUses) {
8067     bool BothLiveOut = false;
8068     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8069          UI != UE; ++UI) {
8070       SDUse &Use = UI.getUse();
8071       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8072         BothLiveOut = true;
8073         break;
8074       }
8075     }
8076     if (BothLiveOut)
8077       // Both unextended and extended values are live out. There had better be
8078       // a good reason for the transformation.
8079       return ExtendNodes.size();
8080   }
8081   return true;
8082 }
8083 
8084 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8085                                   SDValue OrigLoad, SDValue ExtLoad,
8086                                   ISD::NodeType ExtType) {
8087   // Extend SetCC uses if necessary.
8088   SDLoc DL(ExtLoad);
8089   for (SDNode *SetCC : SetCCs) {
8090     SmallVector<SDValue, 4> Ops;
8091 
8092     for (unsigned j = 0; j != 2; ++j) {
8093       SDValue SOp = SetCC->getOperand(j);
8094       if (SOp == OrigLoad)
8095         Ops.push_back(ExtLoad);
8096       else
8097         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8098     }
8099 
8100     Ops.push_back(SetCC->getOperand(2));
8101     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8102   }
8103 }
8104 
8105 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8106 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8107   SDValue N0 = N->getOperand(0);
8108   EVT DstVT = N->getValueType(0);
8109   EVT SrcVT = N0.getValueType();
8110 
8111   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8112           N->getOpcode() == ISD::ZERO_EXTEND) &&
8113          "Unexpected node type (not an extend)!");
8114 
8115   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8116   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8117   //   (v8i32 (sext (v8i16 (load x))))
8118   // into:
8119   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8120   //                          (v4i32 (sextload (x + 16)))))
8121   // Where uses of the original load, i.e.:
8122   //   (v8i16 (load x))
8123   // are replaced with:
8124   //   (v8i16 (truncate
8125   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8126   //                            (v4i32 (sextload (x + 16)))))))
8127   //
8128   // This combine is only applicable to illegal, but splittable, vectors.
8129   // All legal types, and illegal non-vector types, are handled elsewhere.
8130   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8131   //
8132   if (N0->getOpcode() != ISD::LOAD)
8133     return SDValue();
8134 
8135   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8136 
8137   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8138       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8139       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8140     return SDValue();
8141 
8142   SmallVector<SDNode *, 4> SetCCs;
8143   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8144     return SDValue();
8145 
8146   ISD::LoadExtType ExtType =
8147       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8148 
8149   // Try to split the vector types to get down to legal types.
8150   EVT SplitSrcVT = SrcVT;
8151   EVT SplitDstVT = DstVT;
8152   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8153          SplitSrcVT.getVectorNumElements() > 1) {
8154     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8155     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8156   }
8157 
8158   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8159     return SDValue();
8160 
8161   SDLoc DL(N);
8162   const unsigned NumSplits =
8163       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8164   const unsigned Stride = SplitSrcVT.getStoreSize();
8165   SmallVector<SDValue, 4> Loads;
8166   SmallVector<SDValue, 4> Chains;
8167 
8168   SDValue BasePtr = LN0->getBasePtr();
8169   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8170     const unsigned Offset = Idx * Stride;
8171     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8172 
8173     SDValue SplitLoad = DAG.getExtLoad(
8174         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8175         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8176         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8177 
8178     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8179                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8180 
8181     Loads.push_back(SplitLoad.getValue(0));
8182     Chains.push_back(SplitLoad.getValue(1));
8183   }
8184 
8185   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8186   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8187 
8188   // Simplify TF.
8189   AddToWorklist(NewChain.getNode());
8190 
8191   CombineTo(N, NewValue);
8192 
8193   // Replace uses of the original load (before extension)
8194   // with a truncate of the concatenated sextloaded vectors.
8195   SDValue Trunc =
8196       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8197   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8198   CombineTo(N0.getNode(), Trunc, NewChain);
8199   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8200 }
8201 
8202 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8203 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8204 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8205   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8206   EVT VT = N->getValueType(0);
8207 
8208   // and/or/xor
8209   SDValue N0 = N->getOperand(0);
8210   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8211         N0.getOpcode() == ISD::XOR) ||
8212       N0.getOperand(1).getOpcode() != ISD::Constant ||
8213       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8214     return SDValue();
8215 
8216   // shl/shr
8217   SDValue N1 = N0->getOperand(0);
8218   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8219       N1.getOperand(1).getOpcode() != ISD::Constant ||
8220       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8221     return SDValue();
8222 
8223   // load
8224   if (!isa<LoadSDNode>(N1.getOperand(0)))
8225     return SDValue();
8226   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8227   EVT MemVT = Load->getMemoryVT();
8228   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8229       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8230     return SDValue();
8231 
8232 
8233   // If the shift op is SHL, the logic op must be AND, otherwise the result
8234   // will be wrong.
8235   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8236     return SDValue();
8237 
8238   if (!N0.hasOneUse() || !N1.hasOneUse())
8239     return SDValue();
8240 
8241   SmallVector<SDNode*, 4> SetCCs;
8242   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8243                                ISD::ZERO_EXTEND, SetCCs, TLI))
8244     return SDValue();
8245 
8246   // Actually do the transformation.
8247   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8248                                    Load->getChain(), Load->getBasePtr(),
8249                                    Load->getMemoryVT(), Load->getMemOperand());
8250 
8251   SDLoc DL1(N1);
8252   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8253                               N1.getOperand(1));
8254 
8255   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8256   Mask = Mask.zext(VT.getSizeInBits());
8257   SDLoc DL0(N0);
8258   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8259                             DAG.getConstant(Mask, DL0, VT));
8260 
8261   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8262   CombineTo(N, And);
8263   if (SDValue(Load, 0).hasOneUse()) {
8264     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8265   } else {
8266     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8267                                 Load->getValueType(0), ExtLoad);
8268     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8269   }
8270   return SDValue(N,0); // Return N so it doesn't get rechecked!
8271 }
8272 
8273 /// If we're narrowing or widening the result of a vector select and the final
8274 /// size is the same size as a setcc (compare) feeding the select, then try to
8275 /// apply the cast operation to the select's operands because matching vector
8276 /// sizes for a select condition and other operands should be more efficient.
8277 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8278   unsigned CastOpcode = Cast->getOpcode();
8279   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8280           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8281           CastOpcode == ISD::FP_ROUND) &&
8282          "Unexpected opcode for vector select narrowing/widening");
8283 
8284   // We only do this transform before legal ops because the pattern may be
8285   // obfuscated by target-specific operations after legalization. Do not create
8286   // an illegal select op, however, because that may be difficult to lower.
8287   EVT VT = Cast->getValueType(0);
8288   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8289     return SDValue();
8290 
8291   SDValue VSel = Cast->getOperand(0);
8292   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8293       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8294     return SDValue();
8295 
8296   // Does the setcc have the same vector size as the casted select?
8297   SDValue SetCC = VSel.getOperand(0);
8298   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8299   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8300     return SDValue();
8301 
8302   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8303   SDValue A = VSel.getOperand(1);
8304   SDValue B = VSel.getOperand(2);
8305   SDValue CastA, CastB;
8306   SDLoc DL(Cast);
8307   if (CastOpcode == ISD::FP_ROUND) {
8308     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8309     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8310     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8311   } else {
8312     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8313     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8314   }
8315   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8316 }
8317 
8318 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8319 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8320 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8321                                      const TargetLowering &TLI, EVT VT,
8322                                      bool LegalOperations, SDNode *N,
8323                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8324   SDNode *N0Node = N0.getNode();
8325   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8326                                                    : ISD::isZEXTLoad(N0Node);
8327   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8328       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8329     return {};
8330 
8331   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8332   EVT MemVT = LN0->getMemoryVT();
8333   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8334       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8335     return {};
8336 
8337   SDValue ExtLoad =
8338       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8339                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8340   Combiner.CombineTo(N, ExtLoad);
8341   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8342   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8343 }
8344 
8345 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8346 // Only generate vector extloads when 1) they're legal, and 2) they are
8347 // deemed desirable by the target.
8348 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8349                                   const TargetLowering &TLI, EVT VT,
8350                                   bool LegalOperations, SDNode *N, SDValue N0,
8351                                   ISD::LoadExtType ExtLoadType,
8352                                   ISD::NodeType ExtOpc) {
8353   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8354       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8355       ((LegalOperations || VT.isVector() ||
8356         cast<LoadSDNode>(N0)->isVolatile()) &&
8357        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8358     return {};
8359 
8360   bool DoXform = true;
8361   SmallVector<SDNode *, 4> SetCCs;
8362   if (!N0.hasOneUse())
8363     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8364   if (VT.isVector())
8365     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8366   if (!DoXform)
8367     return {};
8368 
8369   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8370   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8371                                    LN0->getBasePtr(), N0.getValueType(),
8372                                    LN0->getMemOperand());
8373   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8374   // If the load value is used only by N, replace it via CombineTo N.
8375   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8376   Combiner.CombineTo(N, ExtLoad);
8377   if (NoReplaceTrunc) {
8378     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8379   } else {
8380     SDValue Trunc =
8381         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8382     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8383   }
8384   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8385 }
8386 
8387 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8388                                        bool LegalOperations) {
8389   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8390           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8391 
8392   SDValue SetCC = N->getOperand(0);
8393   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8394       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8395     return SDValue();
8396 
8397   SDValue X = SetCC.getOperand(0);
8398   SDValue Ones = SetCC.getOperand(1);
8399   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8400   EVT VT = N->getValueType(0);
8401   EVT XVT = X.getValueType();
8402   // setge X, C is canonicalized to setgt, so we do not need to match that
8403   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8404   // not require the 'not' op.
8405   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8406     // Invert and smear/shift the sign bit:
8407     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8408     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8409     SDLoc DL(N);
8410     SDValue NotX = DAG.getNOT(DL, X, VT);
8411     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8412     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8413     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8414   }
8415   return SDValue();
8416 }
8417 
8418 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8419   SDValue N0 = N->getOperand(0);
8420   EVT VT = N->getValueType(0);
8421   SDLoc DL(N);
8422 
8423   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8424     return Res;
8425 
8426   // fold (sext (sext x)) -> (sext x)
8427   // fold (sext (aext x)) -> (sext x)
8428   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8429     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8430 
8431   if (N0.getOpcode() == ISD::TRUNCATE) {
8432     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8433     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8434     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8435       SDNode *oye = N0.getOperand(0).getNode();
8436       if (NarrowLoad.getNode() != N0.getNode()) {
8437         CombineTo(N0.getNode(), NarrowLoad);
8438         // CombineTo deleted the truncate, if needed, but not what's under it.
8439         AddToWorklist(oye);
8440       }
8441       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8442     }
8443 
8444     // See if the value being truncated is already sign extended.  If so, just
8445     // eliminate the trunc/sext pair.
8446     SDValue Op = N0.getOperand(0);
8447     unsigned OpBits   = Op.getScalarValueSizeInBits();
8448     unsigned MidBits  = N0.getScalarValueSizeInBits();
8449     unsigned DestBits = VT.getScalarSizeInBits();
8450     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8451 
8452     if (OpBits == DestBits) {
8453       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8454       // bits, it is already ready.
8455       if (NumSignBits > DestBits-MidBits)
8456         return Op;
8457     } else if (OpBits < DestBits) {
8458       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8459       // bits, just sext from i32.
8460       if (NumSignBits > OpBits-MidBits)
8461         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8462     } else {
8463       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8464       // bits, just truncate to i32.
8465       if (NumSignBits > OpBits-MidBits)
8466         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8467     }
8468 
8469     // fold (sext (truncate x)) -> (sextinreg x).
8470     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8471                                                  N0.getValueType())) {
8472       if (OpBits < DestBits)
8473         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8474       else if (OpBits > DestBits)
8475         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8476       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8477                          DAG.getValueType(N0.getValueType()));
8478     }
8479   }
8480 
8481   // Try to simplify (sext (load x)).
8482   if (SDValue foldedExt =
8483           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8484                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8485     return foldedExt;
8486 
8487   // fold (sext (load x)) to multiple smaller sextloads.
8488   // Only on illegal but splittable vectors.
8489   if (SDValue ExtLoad = CombineExtLoad(N))
8490     return ExtLoad;
8491 
8492   // Try to simplify (sext (sextload x)).
8493   if (SDValue foldedExt = tryToFoldExtOfExtload(
8494           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8495     return foldedExt;
8496 
8497   // fold (sext (and/or/xor (load x), cst)) ->
8498   //      (and/or/xor (sextload x), (sext cst))
8499   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8500        N0.getOpcode() == ISD::XOR) &&
8501       isa<LoadSDNode>(N0.getOperand(0)) &&
8502       N0.getOperand(1).getOpcode() == ISD::Constant &&
8503       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8504     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8505     EVT MemVT = LN00->getMemoryVT();
8506     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8507       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8508       SmallVector<SDNode*, 4> SetCCs;
8509       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8510                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8511       if (DoXform) {
8512         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8513                                          LN00->getChain(), LN00->getBasePtr(),
8514                                          LN00->getMemoryVT(),
8515                                          LN00->getMemOperand());
8516         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8517         Mask = Mask.sext(VT.getSizeInBits());
8518         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8519                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8520         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8521         bool NoReplaceTruncAnd = !N0.hasOneUse();
8522         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8523         CombineTo(N, And);
8524         // If N0 has multiple uses, change other uses as well.
8525         if (NoReplaceTruncAnd) {
8526           SDValue TruncAnd =
8527               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8528           CombineTo(N0.getNode(), TruncAnd);
8529         }
8530         if (NoReplaceTrunc) {
8531           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8532         } else {
8533           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8534                                       LN00->getValueType(0), ExtLoad);
8535           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8536         }
8537         return SDValue(N,0); // Return N so it doesn't get rechecked!
8538       }
8539     }
8540   }
8541 
8542   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8543     return V;
8544 
8545   if (N0.getOpcode() == ISD::SETCC) {
8546     SDValue N00 = N0.getOperand(0);
8547     SDValue N01 = N0.getOperand(1);
8548     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8549     EVT N00VT = N0.getOperand(0).getValueType();
8550 
8551     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8552     // Only do this before legalize for now.
8553     if (VT.isVector() && !LegalOperations &&
8554         TLI.getBooleanContents(N00VT) ==
8555             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8556       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8557       // of the same size as the compared operands. Only optimize sext(setcc())
8558       // if this is the case.
8559       EVT SVT = getSetCCResultType(N00VT);
8560 
8561       // We know that the # elements of the results is the same as the
8562       // # elements of the compare (and the # elements of the compare result
8563       // for that matter).  Check to see that they are the same size.  If so,
8564       // we know that the element size of the sext'd result matches the
8565       // element size of the compare operands.
8566       if (VT.getSizeInBits() == SVT.getSizeInBits())
8567         return DAG.getSetCC(DL, VT, N00, N01, CC);
8568 
8569       // If the desired elements are smaller or larger than the source
8570       // elements, we can use a matching integer vector type and then
8571       // truncate/sign extend.
8572       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8573       if (SVT == MatchingVecType) {
8574         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8575         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8576       }
8577     }
8578 
8579     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8580     // Here, T can be 1 or -1, depending on the type of the setcc and
8581     // getBooleanContents().
8582     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8583 
8584     // To determine the "true" side of the select, we need to know the high bit
8585     // of the value returned by the setcc if it evaluates to true.
8586     // If the type of the setcc is i1, then the true case of the select is just
8587     // sext(i1 1), that is, -1.
8588     // If the type of the setcc is larger (say, i8) then the value of the high
8589     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8590     // of the appropriate width.
8591     SDValue ExtTrueVal = (SetCCWidth == 1)
8592                              ? DAG.getAllOnesConstant(DL, VT)
8593                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8594     SDValue Zero = DAG.getConstant(0, DL, VT);
8595     if (SDValue SCC =
8596             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8597       return SCC;
8598 
8599     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8600       EVT SetCCVT = getSetCCResultType(N00VT);
8601       // Don't do this transform for i1 because there's a select transform
8602       // that would reverse it.
8603       // TODO: We should not do this transform at all without a target hook
8604       // because a sext is likely cheaper than a select?
8605       if (SetCCVT.getScalarSizeInBits() != 1 &&
8606           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8607         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8608         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8609       }
8610     }
8611   }
8612 
8613   // fold (sext x) -> (zext x) if the sign bit is known zero.
8614   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8615       DAG.SignBitIsZero(N0))
8616     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8617 
8618   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8619     return NewVSel;
8620 
8621   return SDValue();
8622 }
8623 
8624 // isTruncateOf - If N is a truncate of some other value, return true, record
8625 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8626 // This function computes KnownBits to avoid a duplicated call to
8627 // computeKnownBits in the caller.
8628 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8629                          KnownBits &Known) {
8630   if (N->getOpcode() == ISD::TRUNCATE) {
8631     Op = N->getOperand(0);
8632     DAG.computeKnownBits(Op, Known);
8633     return true;
8634   }
8635 
8636   if (N.getOpcode() != ISD::SETCC ||
8637       N.getValueType().getScalarType() != MVT::i1 ||
8638       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
8639     return false;
8640 
8641   SDValue Op0 = N->getOperand(0);
8642   SDValue Op1 = N->getOperand(1);
8643   assert(Op0.getValueType() == Op1.getValueType());
8644 
8645   if (isNullOrNullSplat(Op0))
8646     Op = Op1;
8647   else if (isNullOrNullSplat(Op1))
8648     Op = Op0;
8649   else
8650     return false;
8651 
8652   DAG.computeKnownBits(Op, Known);
8653 
8654   return (Known.Zero | 1).isAllOnesValue();
8655 }
8656 
8657 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8658   SDValue N0 = N->getOperand(0);
8659   EVT VT = N->getValueType(0);
8660 
8661   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8662     return Res;
8663 
8664   // fold (zext (zext x)) -> (zext x)
8665   // fold (zext (aext x)) -> (zext x)
8666   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8667     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8668                        N0.getOperand(0));
8669 
8670   // fold (zext (truncate x)) -> (zext x) or
8671   //      (zext (truncate x)) -> (truncate x)
8672   // This is valid when the truncated bits of x are already zero.
8673   SDValue Op;
8674   KnownBits Known;
8675   if (isTruncateOf(DAG, N0, Op, Known)) {
8676     APInt TruncatedBits =
8677       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
8678       APInt(Op.getScalarValueSizeInBits(), 0) :
8679       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
8680                         N0.getScalarValueSizeInBits(),
8681                         std::min(Op.getScalarValueSizeInBits(),
8682                                  VT.getScalarSizeInBits()));
8683     if (TruncatedBits.isSubsetOf(Known.Zero))
8684       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8685   }
8686 
8687   // fold (zext (truncate x)) -> (and x, mask)
8688   if (N0.getOpcode() == ISD::TRUNCATE) {
8689     // fold (zext (truncate (load x))) -> (zext (smaller load x))
8690     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8691     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8692       SDNode *oye = N0.getOperand(0).getNode();
8693       if (NarrowLoad.getNode() != N0.getNode()) {
8694         CombineTo(N0.getNode(), NarrowLoad);
8695         // CombineTo deleted the truncate, if needed, but not what's under it.
8696         AddToWorklist(oye);
8697       }
8698       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8699     }
8700 
8701     EVT SrcVT = N0.getOperand(0).getValueType();
8702     EVT MinVT = N0.getValueType();
8703 
8704     // Try to mask before the extension to avoid having to generate a larger mask,
8705     // possibly over several sub-vectors.
8706     if (SrcVT.bitsLT(VT) && VT.isVector()) {
8707       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8708                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8709         SDValue Op = N0.getOperand(0);
8710         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8711         AddToWorklist(Op.getNode());
8712         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8713         // Transfer the debug info; the new node is equivalent to N0.
8714         DAG.transferDbgValues(N0, ZExtOrTrunc);
8715         return ZExtOrTrunc;
8716       }
8717     }
8718 
8719     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8720       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8721       AddToWorklist(Op.getNode());
8722       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8723       // We may safely transfer the debug info describing the truncate node over
8724       // to the equivalent and operation.
8725       DAG.transferDbgValues(N0, And);
8726       return And;
8727     }
8728   }
8729 
8730   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8731   // if either of the casts is not free.
8732   if (N0.getOpcode() == ISD::AND &&
8733       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8734       N0.getOperand(1).getOpcode() == ISD::Constant &&
8735       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8736                            N0.getValueType()) ||
8737        !TLI.isZExtFree(N0.getValueType(), VT))) {
8738     SDValue X = N0.getOperand(0).getOperand(0);
8739     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8740     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8741     Mask = Mask.zext(VT.getSizeInBits());
8742     SDLoc DL(N);
8743     return DAG.getNode(ISD::AND, DL, VT,
8744                        X, DAG.getConstant(Mask, DL, VT));
8745   }
8746 
8747   // Try to simplify (zext (load x)).
8748   if (SDValue foldedExt =
8749           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8750                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8751     return foldedExt;
8752 
8753   // fold (zext (load x)) to multiple smaller zextloads.
8754   // Only on illegal but splittable vectors.
8755   if (SDValue ExtLoad = CombineExtLoad(N))
8756     return ExtLoad;
8757 
8758   // fold (zext (and/or/xor (load x), cst)) ->
8759   //      (and/or/xor (zextload x), (zext cst))
8760   // Unless (and (load x) cst) will match as a zextload already and has
8761   // additional users.
8762   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8763        N0.getOpcode() == ISD::XOR) &&
8764       isa<LoadSDNode>(N0.getOperand(0)) &&
8765       N0.getOperand(1).getOpcode() == ISD::Constant &&
8766       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8767     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8768     EVT MemVT = LN00->getMemoryVT();
8769     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8770         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8771       bool DoXform = true;
8772       SmallVector<SDNode*, 4> SetCCs;
8773       if (!N0.hasOneUse()) {
8774         if (N0.getOpcode() == ISD::AND) {
8775           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8776           EVT LoadResultTy = AndC->getValueType(0);
8777           EVT ExtVT;
8778           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8779             DoXform = false;
8780         }
8781       }
8782       if (DoXform)
8783         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8784                                           ISD::ZERO_EXTEND, SetCCs, TLI);
8785       if (DoXform) {
8786         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8787                                          LN00->getChain(), LN00->getBasePtr(),
8788                                          LN00->getMemoryVT(),
8789                                          LN00->getMemOperand());
8790         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8791         Mask = Mask.zext(VT.getSizeInBits());
8792         SDLoc DL(N);
8793         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8794                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8795         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8796         bool NoReplaceTruncAnd = !N0.hasOneUse();
8797         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8798         CombineTo(N, And);
8799         // If N0 has multiple uses, change other uses as well.
8800         if (NoReplaceTruncAnd) {
8801           SDValue TruncAnd =
8802               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8803           CombineTo(N0.getNode(), TruncAnd);
8804         }
8805         if (NoReplaceTrunc) {
8806           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8807         } else {
8808           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8809                                       LN00->getValueType(0), ExtLoad);
8810           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8811         }
8812         return SDValue(N,0); // Return N so it doesn't get rechecked!
8813       }
8814     }
8815   }
8816 
8817   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8818   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8819   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8820     return ZExtLoad;
8821 
8822   // Try to simplify (zext (zextload x)).
8823   if (SDValue foldedExt = tryToFoldExtOfExtload(
8824           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8825     return foldedExt;
8826 
8827   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8828     return V;
8829 
8830   if (N0.getOpcode() == ISD::SETCC) {
8831     // Only do this before legalize for now.
8832     if (!LegalOperations && VT.isVector() &&
8833         N0.getValueType().getVectorElementType() == MVT::i1) {
8834       EVT N00VT = N0.getOperand(0).getValueType();
8835       if (getSetCCResultType(N00VT) == N0.getValueType())
8836         return SDValue();
8837 
8838       // We know that the # elements of the results is the same as the #
8839       // elements of the compare (and the # elements of the compare result for
8840       // that matter). Check to see that they are the same size. If so, we know
8841       // that the element size of the sext'd result matches the element size of
8842       // the compare operands.
8843       SDLoc DL(N);
8844       SDValue VecOnes = DAG.getConstant(1, DL, VT);
8845       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8846         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8847         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8848                                      N0.getOperand(1), N0.getOperand(2));
8849         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8850       }
8851 
8852       // If the desired elements are smaller or larger than the source
8853       // elements we can use a matching integer vector type and then
8854       // truncate/sign extend.
8855       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8856       SDValue VsetCC =
8857           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8858                       N0.getOperand(1), N0.getOperand(2));
8859       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8860                          VecOnes);
8861     }
8862 
8863     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8864     SDLoc DL(N);
8865     if (SDValue SCC = SimplifySelectCC(
8866             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8867             DAG.getConstant(0, DL, VT),
8868             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8869       return SCC;
8870   }
8871 
8872   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8873   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8874       isa<ConstantSDNode>(N0.getOperand(1)) &&
8875       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8876       N0.hasOneUse()) {
8877     SDValue ShAmt = N0.getOperand(1);
8878     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8879     if (N0.getOpcode() == ISD::SHL) {
8880       SDValue InnerZExt = N0.getOperand(0);
8881       // If the original shl may be shifting out bits, do not perform this
8882       // transformation.
8883       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8884         InnerZExt.getOperand(0).getValueSizeInBits();
8885       if (ShAmtVal > KnownZeroBits)
8886         return SDValue();
8887     }
8888 
8889     SDLoc DL(N);
8890 
8891     // Ensure that the shift amount is wide enough for the shifted value.
8892     if (VT.getSizeInBits() >= 256)
8893       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8894 
8895     return DAG.getNode(N0.getOpcode(), DL, VT,
8896                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8897                        ShAmt);
8898   }
8899 
8900   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8901     return NewVSel;
8902 
8903   return SDValue();
8904 }
8905 
8906 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8907   SDValue N0 = N->getOperand(0);
8908   EVT VT = N->getValueType(0);
8909 
8910   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8911     return Res;
8912 
8913   // fold (aext (aext x)) -> (aext x)
8914   // fold (aext (zext x)) -> (zext x)
8915   // fold (aext (sext x)) -> (sext x)
8916   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8917       N0.getOpcode() == ISD::ZERO_EXTEND ||
8918       N0.getOpcode() == ISD::SIGN_EXTEND)
8919     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8920 
8921   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8922   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8923   if (N0.getOpcode() == ISD::TRUNCATE) {
8924     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8925       SDNode *oye = N0.getOperand(0).getNode();
8926       if (NarrowLoad.getNode() != N0.getNode()) {
8927         CombineTo(N0.getNode(), NarrowLoad);
8928         // CombineTo deleted the truncate, if needed, but not what's under it.
8929         AddToWorklist(oye);
8930       }
8931       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8932     }
8933   }
8934 
8935   // fold (aext (truncate x))
8936   if (N0.getOpcode() == ISD::TRUNCATE)
8937     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8938 
8939   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8940   // if the trunc is not free.
8941   if (N0.getOpcode() == ISD::AND &&
8942       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8943       N0.getOperand(1).getOpcode() == ISD::Constant &&
8944       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8945                           N0.getValueType())) {
8946     SDLoc DL(N);
8947     SDValue X = N0.getOperand(0).getOperand(0);
8948     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8949     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8950     Mask = Mask.zext(VT.getSizeInBits());
8951     return DAG.getNode(ISD::AND, DL, VT,
8952                        X, DAG.getConstant(Mask, DL, VT));
8953   }
8954 
8955   // fold (aext (load x)) -> (aext (truncate (extload x)))
8956   // None of the supported targets knows how to perform load and any_ext
8957   // on vectors in one instruction.  We only perform this transformation on
8958   // scalars.
8959   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8960       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8961       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8962     bool DoXform = true;
8963     SmallVector<SDNode*, 4> SetCCs;
8964     if (!N0.hasOneUse())
8965       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8966                                         TLI);
8967     if (DoXform) {
8968       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8969       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8970                                        LN0->getChain(),
8971                                        LN0->getBasePtr(), N0.getValueType(),
8972                                        LN0->getMemOperand());
8973       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
8974       // If the load value is used only by N, replace it via CombineTo N.
8975       bool NoReplaceTrunc = N0.hasOneUse();
8976       CombineTo(N, ExtLoad);
8977       if (NoReplaceTrunc) {
8978         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8979       } else {
8980         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8981                                     N0.getValueType(), ExtLoad);
8982         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8983       }
8984       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8985     }
8986   }
8987 
8988   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8989   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8990   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8991   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8992       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8993     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8994     ISD::LoadExtType ExtType = LN0->getExtensionType();
8995     EVT MemVT = LN0->getMemoryVT();
8996     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8997       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8998                                        VT, LN0->getChain(), LN0->getBasePtr(),
8999                                        MemVT, LN0->getMemOperand());
9000       CombineTo(N, ExtLoad);
9001       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9002       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9003     }
9004   }
9005 
9006   if (N0.getOpcode() == ISD::SETCC) {
9007     // For vectors:
9008     // aext(setcc) -> vsetcc
9009     // aext(setcc) -> truncate(vsetcc)
9010     // aext(setcc) -> aext(vsetcc)
9011     // Only do this before legalize for now.
9012     if (VT.isVector() && !LegalOperations) {
9013       EVT N00VT = N0.getOperand(0).getValueType();
9014       if (getSetCCResultType(N00VT) == N0.getValueType())
9015         return SDValue();
9016 
9017       // We know that the # elements of the results is the same as the
9018       // # elements of the compare (and the # elements of the compare result
9019       // for that matter).  Check to see that they are the same size.  If so,
9020       // we know that the element size of the sext'd result matches the
9021       // element size of the compare operands.
9022       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9023         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9024                              N0.getOperand(1),
9025                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9026 
9027       // If the desired elements are smaller or larger than the source
9028       // elements we can use a matching integer vector type and then
9029       // truncate/any extend
9030       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9031       SDValue VsetCC =
9032         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9033                       N0.getOperand(1),
9034                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9035       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9036     }
9037 
9038     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9039     SDLoc DL(N);
9040     if (SDValue SCC = SimplifySelectCC(
9041             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9042             DAG.getConstant(0, DL, VT),
9043             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9044       return SCC;
9045   }
9046 
9047   return SDValue();
9048 }
9049 
9050 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9051   unsigned Opcode = N->getOpcode();
9052   SDValue N0 = N->getOperand(0);
9053   SDValue N1 = N->getOperand(1);
9054   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9055 
9056   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9057   if (N0.getOpcode() == Opcode &&
9058       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9059     return N0;
9060 
9061   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9062       N0.getOperand(0).getOpcode() == Opcode) {
9063     // We have an assert, truncate, assert sandwich. Make one stronger assert
9064     // by asserting on the smallest asserted type to the larger source type.
9065     // This eliminates the later assert:
9066     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9067     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9068     SDValue BigA = N0.getOperand(0);
9069     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9070     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9071            "Asserting zero/sign-extended bits to a type larger than the "
9072            "truncated destination does not provide information");
9073 
9074     SDLoc DL(N);
9075     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9076     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9077     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9078                                     BigA.getOperand(0), MinAssertVTVal);
9079     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9080   }
9081 
9082   return SDValue();
9083 }
9084 
9085 /// If the result of a wider load is shifted to right of N  bits and then
9086 /// truncated to a narrower type and where N is a multiple of number of bits of
9087 /// the narrower type, transform it to a narrower load from address + N / num of
9088 /// bits of new type. Also narrow the load if the result is masked with an AND
9089 /// to effectively produce a smaller type. If the result is to be extended, also
9090 /// fold the extension to form a extending load.
9091 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9092   unsigned Opc = N->getOpcode();
9093 
9094   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9095   SDValue N0 = N->getOperand(0);
9096   EVT VT = N->getValueType(0);
9097   EVT ExtVT = VT;
9098 
9099   // This transformation isn't valid for vector loads.
9100   if (VT.isVector())
9101     return SDValue();
9102 
9103   unsigned ShAmt = 0;
9104   bool HasShiftedOffset = false;
9105   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9106   // extended to VT.
9107   if (Opc == ISD::SIGN_EXTEND_INREG) {
9108     ExtType = ISD::SEXTLOAD;
9109     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9110   } else if (Opc == ISD::SRL) {
9111     // Another special-case: SRL is basically zero-extending a narrower value,
9112     // or it maybe shifting a higher subword, half or byte into the lowest
9113     // bits.
9114     ExtType = ISD::ZEXTLOAD;
9115     N0 = SDValue(N, 0);
9116 
9117     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9118     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9119     if (!N01 || !LN0)
9120       return SDValue();
9121 
9122     uint64_t ShiftAmt = N01->getZExtValue();
9123     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9124     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9125       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9126     else
9127       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9128                                 VT.getSizeInBits() - ShiftAmt);
9129   } else if (Opc == ISD::AND) {
9130     // An AND with a constant mask is the same as a truncate + zero-extend.
9131     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9132     if (!AndC)
9133       return SDValue();
9134 
9135     const APInt &Mask = AndC->getAPIntValue();
9136     unsigned ActiveBits = 0;
9137     if (Mask.isMask()) {
9138       ActiveBits = Mask.countTrailingOnes();
9139     } else if (Mask.isShiftedMask()) {
9140       ShAmt = Mask.countTrailingZeros();
9141       APInt ShiftedMask = Mask.lshr(ShAmt);
9142       ActiveBits = ShiftedMask.countTrailingOnes();
9143       HasShiftedOffset = true;
9144     } else
9145       return SDValue();
9146 
9147     ExtType = ISD::ZEXTLOAD;
9148     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9149   }
9150 
9151   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9152     SDValue SRL = N0;
9153     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9154       ShAmt = ConstShift->getZExtValue();
9155       unsigned EVTBits = ExtVT.getSizeInBits();
9156       // Is the shift amount a multiple of size of VT?
9157       if ((ShAmt & (EVTBits-1)) == 0) {
9158         N0 = N0.getOperand(0);
9159         // Is the load width a multiple of size of VT?
9160         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9161           return SDValue();
9162       }
9163 
9164       // At this point, we must have a load or else we can't do the transform.
9165       if (!isa<LoadSDNode>(N0)) return SDValue();
9166 
9167       auto *LN0 = cast<LoadSDNode>(N0);
9168 
9169       // Because a SRL must be assumed to *need* to zero-extend the high bits
9170       // (as opposed to anyext the high bits), we can't combine the zextload
9171       // lowering of SRL and an sextload.
9172       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9173         return SDValue();
9174 
9175       // If the shift amount is larger than the input type then we're not
9176       // accessing any of the loaded bytes.  If the load was a zextload/extload
9177       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9178       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9179         return SDValue();
9180 
9181       // If the SRL is only used by a masking AND, we may be able to adjust
9182       // the ExtVT to make the AND redundant.
9183       SDNode *Mask = *(SRL->use_begin());
9184       if (Mask->getOpcode() == ISD::AND &&
9185           isa<ConstantSDNode>(Mask->getOperand(1))) {
9186         const APInt &ShiftMask =
9187           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9188         if (ShiftMask.isMask()) {
9189           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9190                                            ShiftMask.countTrailingOnes());
9191           // If the mask is smaller, recompute the type.
9192           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9193               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9194             ExtVT = MaskedVT;
9195         }
9196       }
9197     }
9198   }
9199 
9200   // If the load is shifted left (and the result isn't shifted back right),
9201   // we can fold the truncate through the shift.
9202   unsigned ShLeftAmt = 0;
9203   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9204       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9205     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9206       ShLeftAmt = N01->getZExtValue();
9207       N0 = N0.getOperand(0);
9208     }
9209   }
9210 
9211   // If we haven't found a load, we can't narrow it.
9212   if (!isa<LoadSDNode>(N0))
9213     return SDValue();
9214 
9215   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9216   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9217     return SDValue();
9218 
9219   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9220     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9221     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9222     return LVTStoreBits - EVTStoreBits - ShAmt;
9223   };
9224 
9225   // For big endian targets, we need to adjust the offset to the pointer to
9226   // load the correct bytes.
9227   if (DAG.getDataLayout().isBigEndian())
9228     ShAmt = AdjustBigEndianShift(ShAmt);
9229 
9230   EVT PtrType = N0.getOperand(1).getValueType();
9231   uint64_t PtrOff = ShAmt / 8;
9232   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9233   SDLoc DL(LN0);
9234   // The original load itself didn't wrap, so an offset within it doesn't.
9235   SDNodeFlags Flags;
9236   Flags.setNoUnsignedWrap(true);
9237   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9238                                PtrType, LN0->getBasePtr(),
9239                                DAG.getConstant(PtrOff, DL, PtrType),
9240                                Flags);
9241   AddToWorklist(NewPtr.getNode());
9242 
9243   SDValue Load;
9244   if (ExtType == ISD::NON_EXTLOAD)
9245     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9246                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9247                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9248   else
9249     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9250                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9251                           NewAlign, LN0->getMemOperand()->getFlags(),
9252                           LN0->getAAInfo());
9253 
9254   // Replace the old load's chain with the new load's chain.
9255   WorklistRemover DeadNodes(*this);
9256   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9257 
9258   // Shift the result left, if we've swallowed a left shift.
9259   SDValue Result = Load;
9260   if (ShLeftAmt != 0) {
9261     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9262     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9263       ShImmTy = VT;
9264     // If the shift amount is as large as the result size (but, presumably,
9265     // no larger than the source) then the useful bits of the result are
9266     // zero; we can't simply return the shortened shift, because the result
9267     // of that operation is undefined.
9268     SDLoc DL(N0);
9269     if (ShLeftAmt >= VT.getSizeInBits())
9270       Result = DAG.getConstant(0, DL, VT);
9271     else
9272       Result = DAG.getNode(ISD::SHL, DL, VT,
9273                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9274   }
9275 
9276   if (HasShiftedOffset) {
9277     // Recalculate the shift amount after it has been altered to calculate
9278     // the offset.
9279     if (DAG.getDataLayout().isBigEndian())
9280       ShAmt = AdjustBigEndianShift(ShAmt);
9281 
9282     // We're using a shifted mask, so the load now has an offset. This means we
9283     // now need to shift right the mask to match the new load and then shift
9284     // right the result of the AND.
9285     const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
9286     APInt ShiftedMask = Mask.lshr(ShAmt);
9287     DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT));
9288     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9289     SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0),
9290                                   ShiftC);
9291     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted);
9292     DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC);
9293   }
9294   // Return the new loaded value.
9295   return Result;
9296 }
9297 
9298 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9299   SDValue N0 = N->getOperand(0);
9300   SDValue N1 = N->getOperand(1);
9301   EVT VT = N->getValueType(0);
9302   EVT EVT = cast<VTSDNode>(N1)->getVT();
9303   unsigned VTBits = VT.getScalarSizeInBits();
9304   unsigned EVTBits = EVT.getScalarSizeInBits();
9305 
9306   if (N0.isUndef())
9307     return DAG.getUNDEF(VT);
9308 
9309   // fold (sext_in_reg c1) -> c1
9310   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9311     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9312 
9313   // If the input is already sign extended, just drop the extension.
9314   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9315     return N0;
9316 
9317   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9318   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9319       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9320     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9321                        N0.getOperand(0), N1);
9322 
9323   // fold (sext_in_reg (sext x)) -> (sext x)
9324   // fold (sext_in_reg (aext x)) -> (sext x)
9325   // if x is small enough.
9326   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9327     SDValue N00 = N0.getOperand(0);
9328     if (N00.getScalarValueSizeInBits() <= EVTBits &&
9329         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9330       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9331   }
9332 
9333   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9334   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9335        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9336        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9337       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9338     if (!LegalOperations ||
9339         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9340       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9341                          N0.getOperand(0));
9342   }
9343 
9344   // fold (sext_in_reg (zext x)) -> (sext x)
9345   // iff we are extending the source sign bit.
9346   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9347     SDValue N00 = N0.getOperand(0);
9348     if (N00.getScalarValueSizeInBits() == EVTBits &&
9349         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9350       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9351   }
9352 
9353   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9354   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9355     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9356 
9357   // fold operands of sext_in_reg based on knowledge that the top bits are not
9358   // demanded.
9359   if (SimplifyDemandedBits(SDValue(N, 0)))
9360     return SDValue(N, 0);
9361 
9362   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9363   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9364   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9365     return NarrowLoad;
9366 
9367   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9368   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9369   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9370   if (N0.getOpcode() == ISD::SRL) {
9371     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9372       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9373         // We can turn this into an SRA iff the input to the SRL is already sign
9374         // extended enough.
9375         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9376         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9377           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9378                              N0.getOperand(0), N0.getOperand(1));
9379       }
9380   }
9381 
9382   // fold (sext_inreg (extload x)) -> (sextload x)
9383   // If sextload is not supported by target, we can only do the combine when
9384   // load has one use. Doing otherwise can block folding the extload with other
9385   // extends that the target does support.
9386   if (ISD::isEXTLoad(N0.getNode()) &&
9387       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9388       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9389       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9390         N0.hasOneUse()) ||
9391        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9392     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9393     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9394                                      LN0->getChain(),
9395                                      LN0->getBasePtr(), EVT,
9396                                      LN0->getMemOperand());
9397     CombineTo(N, ExtLoad);
9398     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9399     AddToWorklist(ExtLoad.getNode());
9400     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9401   }
9402   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9403   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9404       N0.hasOneUse() &&
9405       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9406       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9407        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9408     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9409     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9410                                      LN0->getChain(),
9411                                      LN0->getBasePtr(), EVT,
9412                                      LN0->getMemOperand());
9413     CombineTo(N, ExtLoad);
9414     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9415     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9416   }
9417 
9418   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9419   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9420     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9421                                            N0.getOperand(1), false))
9422       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9423                          BSwap, N1);
9424   }
9425 
9426   return SDValue();
9427 }
9428 
9429 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9430   SDValue N0 = N->getOperand(0);
9431   EVT VT = N->getValueType(0);
9432 
9433   if (N0.isUndef())
9434     return DAG.getUNDEF(VT);
9435 
9436   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9437     return Res;
9438 
9439   return SDValue();
9440 }
9441 
9442 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9443   SDValue N0 = N->getOperand(0);
9444   EVT VT = N->getValueType(0);
9445 
9446   if (N0.isUndef())
9447     return DAG.getUNDEF(VT);
9448 
9449   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9450     return Res;
9451 
9452   return SDValue();
9453 }
9454 
9455 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9456   SDValue N0 = N->getOperand(0);
9457   EVT VT = N->getValueType(0);
9458   bool isLE = DAG.getDataLayout().isLittleEndian();
9459 
9460   // noop truncate
9461   if (N0.getValueType() == N->getValueType(0))
9462     return N0;
9463 
9464   // fold (truncate (truncate x)) -> (truncate x)
9465   if (N0.getOpcode() == ISD::TRUNCATE)
9466     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9467 
9468   // fold (truncate c1) -> c1
9469   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9470     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9471     if (C.getNode() != N)
9472       return C;
9473   }
9474 
9475   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9476   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9477       N0.getOpcode() == ISD::SIGN_EXTEND ||
9478       N0.getOpcode() == ISD::ANY_EXTEND) {
9479     // if the source is smaller than the dest, we still need an extend.
9480     if (N0.getOperand(0).getValueType().bitsLT(VT))
9481       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9482     // if the source is larger than the dest, than we just need the truncate.
9483     if (N0.getOperand(0).getValueType().bitsGT(VT))
9484       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9485     // if the source and dest are the same type, we can drop both the extend
9486     // and the truncate.
9487     return N0.getOperand(0);
9488   }
9489 
9490   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9491   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9492     return SDValue();
9493 
9494   // Fold extract-and-trunc into a narrow extract. For example:
9495   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9496   //   i32 y = TRUNCATE(i64 x)
9497   //        -- becomes --
9498   //   v16i8 b = BITCAST (v2i64 val)
9499   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9500   //
9501   // Note: We only run this optimization after type legalization (which often
9502   // creates this pattern) and before operation legalization after which
9503   // we need to be more careful about the vector instructions that we generate.
9504   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9505       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9506     EVT VecTy = N0.getOperand(0).getValueType();
9507     EVT ExTy = N0.getValueType();
9508     EVT TrTy = N->getValueType(0);
9509 
9510     unsigned NumElem = VecTy.getVectorNumElements();
9511     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9512 
9513     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9514     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9515 
9516     SDValue EltNo = N0->getOperand(1);
9517     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9518       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9519       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9520       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9521 
9522       SDLoc DL(N);
9523       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9524                          DAG.getBitcast(NVT, N0.getOperand(0)),
9525                          DAG.getConstant(Index, DL, IndexTy));
9526     }
9527   }
9528 
9529   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9530   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9531     EVT SrcVT = N0.getValueType();
9532     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9533         TLI.isTruncateFree(SrcVT, VT)) {
9534       SDLoc SL(N0);
9535       SDValue Cond = N0.getOperand(0);
9536       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9537       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9538       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9539     }
9540   }
9541 
9542   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9543   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9544       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9545       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9546     SDValue Amt = N0.getOperand(1);
9547     KnownBits Known;
9548     DAG.computeKnownBits(Amt, Known);
9549     unsigned Size = VT.getScalarSizeInBits();
9550     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9551       SDLoc SL(N);
9552       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9553 
9554       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9555       if (AmtVT != Amt.getValueType()) {
9556         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9557         AddToWorklist(Amt.getNode());
9558       }
9559       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9560     }
9561   }
9562 
9563   // Fold a series of buildvector, bitcast, and truncate if possible.
9564   // For example fold
9565   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9566   //   (2xi32 (buildvector x, y)).
9567   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9568       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9569       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9570       N0.getOperand(0).hasOneUse()) {
9571     SDValue BuildVect = N0.getOperand(0);
9572     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9573     EVT TruncVecEltTy = VT.getVectorElementType();
9574 
9575     // Check that the element types match.
9576     if (BuildVectEltTy == TruncVecEltTy) {
9577       // Now we only need to compute the offset of the truncated elements.
9578       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9579       unsigned TruncVecNumElts = VT.getVectorNumElements();
9580       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9581 
9582       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9583              "Invalid number of elements");
9584 
9585       SmallVector<SDValue, 8> Opnds;
9586       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9587         Opnds.push_back(BuildVect.getOperand(i));
9588 
9589       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9590     }
9591   }
9592 
9593   // See if we can simplify the input to this truncate through knowledge that
9594   // only the low bits are being used.
9595   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9596   // Currently we only perform this optimization on scalars because vectors
9597   // may have different active low bits.
9598   if (!VT.isVector()) {
9599     APInt Mask =
9600         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9601     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9602       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9603   }
9604 
9605   // fold (truncate (load x)) -> (smaller load x)
9606   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9607   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9608     if (SDValue Reduced = ReduceLoadWidth(N))
9609       return Reduced;
9610 
9611     // Handle the case where the load remains an extending load even
9612     // after truncation.
9613     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9614       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9615       if (!LN0->isVolatile() &&
9616           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9617         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9618                                          VT, LN0->getChain(), LN0->getBasePtr(),
9619                                          LN0->getMemoryVT(),
9620                                          LN0->getMemOperand());
9621         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9622         return NewLoad;
9623       }
9624     }
9625   }
9626 
9627   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9628   // where ... are all 'undef'.
9629   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9630     SmallVector<EVT, 8> VTs;
9631     SDValue V;
9632     unsigned Idx = 0;
9633     unsigned NumDefs = 0;
9634 
9635     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9636       SDValue X = N0.getOperand(i);
9637       if (!X.isUndef()) {
9638         V = X;
9639         Idx = i;
9640         NumDefs++;
9641       }
9642       // Stop if more than one members are non-undef.
9643       if (NumDefs > 1)
9644         break;
9645       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9646                                      VT.getVectorElementType(),
9647                                      X.getValueType().getVectorNumElements()));
9648     }
9649 
9650     if (NumDefs == 0)
9651       return DAG.getUNDEF(VT);
9652 
9653     if (NumDefs == 1) {
9654       assert(V.getNode() && "The single defined operand is empty!");
9655       SmallVector<SDValue, 8> Opnds;
9656       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9657         if (i != Idx) {
9658           Opnds.push_back(DAG.getUNDEF(VTs[i]));
9659           continue;
9660         }
9661         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9662         AddToWorklist(NV.getNode());
9663         Opnds.push_back(NV);
9664       }
9665       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9666     }
9667   }
9668 
9669   // Fold truncate of a bitcast of a vector to an extract of the low vector
9670   // element.
9671   //
9672   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9673   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9674     SDValue VecSrc = N0.getOperand(0);
9675     EVT SrcVT = VecSrc.getValueType();
9676     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9677         (!LegalOperations ||
9678          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9679       SDLoc SL(N);
9680 
9681       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9682       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9683       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9684                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9685     }
9686   }
9687 
9688   // Simplify the operands using demanded-bits information.
9689   if (!VT.isVector() &&
9690       SimplifyDemandedBits(SDValue(N, 0)))
9691     return SDValue(N, 0);
9692 
9693   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9694   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9695   // When the adde's carry is not used.
9696   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9697       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9698       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9699     SDLoc SL(N);
9700     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9701     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9702     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9703     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9704   }
9705 
9706   // fold (truncate (extract_subvector(ext x))) ->
9707   //      (extract_subvector x)
9708   // TODO: This can be generalized to cover cases where the truncate and extract
9709   // do not fully cancel each other out.
9710   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9711     SDValue N00 = N0.getOperand(0);
9712     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9713         N00.getOpcode() == ISD::ZERO_EXTEND ||
9714         N00.getOpcode() == ISD::ANY_EXTEND) {
9715       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9716           VT.getVectorElementType())
9717         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9718                            N00.getOperand(0), N0.getOperand(1));
9719     }
9720   }
9721 
9722   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9723     return NewVSel;
9724 
9725   return SDValue();
9726 }
9727 
9728 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9729   SDValue Elt = N->getOperand(i);
9730   if (Elt.getOpcode() != ISD::MERGE_VALUES)
9731     return Elt.getNode();
9732   return Elt.getOperand(Elt.getResNo()).getNode();
9733 }
9734 
9735 /// build_pair (load, load) -> load
9736 /// if load locations are consecutive.
9737 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9738   assert(N->getOpcode() == ISD::BUILD_PAIR);
9739 
9740   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9741   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9742 
9743   // A BUILD_PAIR is always having the least significant part in elt 0 and the
9744   // most significant part in elt 1. So when combining into one large load, we
9745   // need to consider the endianness.
9746   if (DAG.getDataLayout().isBigEndian())
9747     std::swap(LD1, LD2);
9748 
9749   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9750       LD1->getAddressSpace() != LD2->getAddressSpace())
9751     return SDValue();
9752   EVT LD1VT = LD1->getValueType(0);
9753   unsigned LD1Bytes = LD1VT.getStoreSize();
9754   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9755       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9756     unsigned Align = LD1->getAlignment();
9757     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9758         VT.getTypeForEVT(*DAG.getContext()));
9759 
9760     if (NewAlign <= Align &&
9761         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9762       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9763                          LD1->getPointerInfo(), Align);
9764   }
9765 
9766   return SDValue();
9767 }
9768 
9769 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9770   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9771   // and Lo parts; on big-endian machines it doesn't.
9772   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9773 }
9774 
9775 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9776                                     const TargetLowering &TLI) {
9777   // If this is not a bitcast to an FP type or if the target doesn't have
9778   // IEEE754-compliant FP logic, we're done.
9779   EVT VT = N->getValueType(0);
9780   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9781     return SDValue();
9782 
9783   // TODO: Handle cases where the integer constant is a different scalar
9784   // bitwidth to the FP.
9785   SDValue N0 = N->getOperand(0);
9786   EVT SourceVT = N0.getValueType();
9787   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
9788     return SDValue();
9789 
9790   unsigned FPOpcode;
9791   APInt SignMask;
9792   switch (N0.getOpcode()) {
9793   case ISD::AND:
9794     FPOpcode = ISD::FABS;
9795     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
9796     break;
9797   case ISD::XOR:
9798     FPOpcode = ISD::FNEG;
9799     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
9800     break;
9801   case ISD::OR:
9802     FPOpcode = ISD::FABS;
9803     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
9804     break;
9805   default:
9806     return SDValue();
9807   }
9808 
9809   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9810   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9811   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
9812   //   fneg (fabs X)
9813   SDValue LogicOp0 = N0.getOperand(0);
9814   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
9815   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9816       LogicOp0.getOpcode() == ISD::BITCAST &&
9817       LogicOp0.getOperand(0).getValueType() == VT) {
9818     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
9819     NumFPLogicOpsConv++;
9820     if (N0.getOpcode() == ISD::OR)
9821       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
9822     return FPOp;
9823   }
9824 
9825   return SDValue();
9826 }
9827 
9828 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9829   SDValue N0 = N->getOperand(0);
9830   EVT VT = N->getValueType(0);
9831 
9832   if (N0.isUndef())
9833     return DAG.getUNDEF(VT);
9834 
9835   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9836   // Only do this before legalize types, since we might create an illegal
9837   // scalar type. Even if we knew we wouldn't create an illegal scalar type
9838   // we can only do this before legalize ops, since the target maybe
9839   // depending on the bitcast.
9840   // First check to see if this is all constant.
9841   if (!LegalTypes &&
9842       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9843       VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
9844     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
9845                                              VT.getVectorElementType());
9846 
9847   // If the input is a constant, let getNode fold it.
9848   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
9849     // If we can't allow illegal operations, we need to check that this is just
9850     // a fp -> int or int -> conversion and that the resulting operation will
9851     // be legal.
9852     if (!LegalOperations ||
9853         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9854          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
9855         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9856          TLI.isOperationLegal(ISD::Constant, VT))) {
9857       SDValue C = DAG.getBitcast(VT, N0);
9858       if (C.getNode() != N)
9859         return C;
9860     }
9861   }
9862 
9863   // (conv (conv x, t1), t2) -> (conv x, t2)
9864   if (N0.getOpcode() == ISD::BITCAST)
9865     return DAG.getBitcast(VT, N0.getOperand(0));
9866 
9867   // fold (conv (load x)) -> (load (conv*)x)
9868   // If the resultant load doesn't need a higher alignment than the original!
9869   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9870       // Do not remove the cast if the types differ in endian layout.
9871       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9872           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9873       // If the load is volatile, we only want to change the load type if the
9874       // resulting load is legal. Otherwise we might increase the number of
9875       // memory accesses. We don't care if the original type was legal or not
9876       // as we assume software couldn't rely on the number of accesses of an
9877       // illegal type.
9878       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9879        TLI.isOperationLegal(ISD::LOAD, VT)) &&
9880       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9881     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9882     unsigned OrigAlign = LN0->getAlignment();
9883 
9884     bool Fast = false;
9885     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9886                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
9887         Fast) {
9888       SDValue Load =
9889           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9890                       LN0->getPointerInfo(), OrigAlign,
9891                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9892       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9893       return Load;
9894     }
9895   }
9896 
9897   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9898     return V;
9899 
9900   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9901   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9902   //
9903   // For ppc_fp128:
9904   // fold (bitcast (fneg x)) ->
9905   //     flipbit = signbit
9906   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9907   //
9908   // fold (bitcast (fabs x)) ->
9909   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
9910   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9911   // This often reduces constant pool loads.
9912   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9913        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9914       N0.getNode()->hasOneUse() && VT.isInteger() &&
9915       !VT.isVector() && !N0.getValueType().isVector()) {
9916     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9917     AddToWorklist(NewConv.getNode());
9918 
9919     SDLoc DL(N);
9920     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9921       assert(VT.getSizeInBits() == 128);
9922       SDValue SignBit = DAG.getConstant(
9923           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9924       SDValue FlipBit;
9925       if (N0.getOpcode() == ISD::FNEG) {
9926         FlipBit = SignBit;
9927         AddToWorklist(FlipBit.getNode());
9928       } else {
9929         assert(N0.getOpcode() == ISD::FABS);
9930         SDValue Hi =
9931             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9932                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9933                                               SDLoc(NewConv)));
9934         AddToWorklist(Hi.getNode());
9935         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9936         AddToWorklist(FlipBit.getNode());
9937       }
9938       SDValue FlipBits =
9939           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9940       AddToWorklist(FlipBits.getNode());
9941       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9942     }
9943     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9944     if (N0.getOpcode() == ISD::FNEG)
9945       return DAG.getNode(ISD::XOR, DL, VT,
9946                          NewConv, DAG.getConstant(SignBit, DL, VT));
9947     assert(N0.getOpcode() == ISD::FABS);
9948     return DAG.getNode(ISD::AND, DL, VT,
9949                        NewConv, DAG.getConstant(~SignBit, DL, VT));
9950   }
9951 
9952   // fold (bitconvert (fcopysign cst, x)) ->
9953   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
9954   // Note that we don't handle (copysign x, cst) because this can always be
9955   // folded to an fneg or fabs.
9956   //
9957   // For ppc_fp128:
9958   // fold (bitcast (fcopysign cst, x)) ->
9959   //     flipbit = (and (extract_element
9960   //                     (xor (bitcast cst), (bitcast x)), 0),
9961   //                    signbit)
9962   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
9963   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9964       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9965       VT.isInteger() && !VT.isVector()) {
9966     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9967     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9968     if (isTypeLegal(IntXVT)) {
9969       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9970       AddToWorklist(X.getNode());
9971 
9972       // If X has a different width than the result/lhs, sext it or truncate it.
9973       unsigned VTWidth = VT.getSizeInBits();
9974       if (OrigXWidth < VTWidth) {
9975         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9976         AddToWorklist(X.getNode());
9977       } else if (OrigXWidth > VTWidth) {
9978         // To get the sign bit in the right place, we have to shift it right
9979         // before truncating.
9980         SDLoc DL(X);
9981         X = DAG.getNode(ISD::SRL, DL,
9982                         X.getValueType(), X,
9983                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9984                                         X.getValueType()));
9985         AddToWorklist(X.getNode());
9986         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9987         AddToWorklist(X.getNode());
9988       }
9989 
9990       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9991         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9992         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9993         AddToWorklist(Cst.getNode());
9994         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9995         AddToWorklist(X.getNode());
9996         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9997         AddToWorklist(XorResult.getNode());
9998         SDValue XorResult64 = DAG.getNode(
9999             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10000             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10001                                   SDLoc(XorResult)));
10002         AddToWorklist(XorResult64.getNode());
10003         SDValue FlipBit =
10004             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10005                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10006         AddToWorklist(FlipBit.getNode());
10007         SDValue FlipBits =
10008             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10009         AddToWorklist(FlipBits.getNode());
10010         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10011       }
10012       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10013       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10014                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10015       AddToWorklist(X.getNode());
10016 
10017       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10018       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10019                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10020       AddToWorklist(Cst.getNode());
10021 
10022       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10023     }
10024   }
10025 
10026   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10027   if (N0.getOpcode() == ISD::BUILD_PAIR)
10028     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10029       return CombineLD;
10030 
10031   // Remove double bitcasts from shuffles - this is often a legacy of
10032   // XformToShuffleWithZero being used to combine bitmaskings (of
10033   // float vectors bitcast to integer vectors) into shuffles.
10034   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10035   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10036       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
10037       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10038       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10039     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10040 
10041     // If operands are a bitcast, peek through if it casts the original VT.
10042     // If operands are a constant, just bitcast back to original VT.
10043     auto PeekThroughBitcast = [&](SDValue Op) {
10044       if (Op.getOpcode() == ISD::BITCAST &&
10045           Op.getOperand(0).getValueType() == VT)
10046         return SDValue(Op.getOperand(0));
10047       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10048           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10049         return DAG.getBitcast(VT, Op);
10050       return SDValue();
10051     };
10052 
10053     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10054     // the result type of this bitcast. This would eliminate at least one
10055     // bitcast. See the transform in InstCombine.
10056     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10057     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10058     if (!(SV0 && SV1))
10059       return SDValue();
10060 
10061     int MaskScale =
10062         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10063     SmallVector<int, 8> NewMask;
10064     for (int M : SVN->getMask())
10065       for (int i = 0; i != MaskScale; ++i)
10066         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10067 
10068     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10069     if (!LegalMask) {
10070       std::swap(SV0, SV1);
10071       ShuffleVectorSDNode::commuteMask(NewMask);
10072       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10073     }
10074 
10075     if (LegalMask)
10076       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10077   }
10078 
10079   return SDValue();
10080 }
10081 
10082 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10083   EVT VT = N->getValueType(0);
10084   return CombineConsecutiveLoads(N, VT);
10085 }
10086 
10087 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10088 /// operands. DstEltVT indicates the destination element value type.
10089 SDValue DAGCombiner::
10090 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10091   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10092 
10093   // If this is already the right type, we're done.
10094   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10095 
10096   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10097   unsigned DstBitSize = DstEltVT.getSizeInBits();
10098 
10099   // If this is a conversion of N elements of one type to N elements of another
10100   // type, convert each element.  This handles FP<->INT cases.
10101   if (SrcBitSize == DstBitSize) {
10102     SmallVector<SDValue, 8> Ops;
10103     for (SDValue Op : BV->op_values()) {
10104       // If the vector element type is not legal, the BUILD_VECTOR operands
10105       // are promoted and implicitly truncated.  Make that explicit here.
10106       if (Op.getValueType() != SrcEltVT)
10107         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10108       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10109       AddToWorklist(Ops.back().getNode());
10110     }
10111     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10112                               BV->getValueType(0).getVectorNumElements());
10113     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10114   }
10115 
10116   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10117   // handle annoying details of growing/shrinking FP values, we convert them to
10118   // int first.
10119   if (SrcEltVT.isFloatingPoint()) {
10120     // Convert the input float vector to a int vector where the elements are the
10121     // same sizes.
10122     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10123     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10124     SrcEltVT = IntVT;
10125   }
10126 
10127   // Now we know the input is an integer vector.  If the output is a FP type,
10128   // convert to integer first, then to FP of the right size.
10129   if (DstEltVT.isFloatingPoint()) {
10130     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10131     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10132 
10133     // Next, convert to FP elements of the same size.
10134     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10135   }
10136 
10137   SDLoc DL(BV);
10138 
10139   // Okay, we know the src/dst types are both integers of differing types.
10140   // Handling growing first.
10141   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10142   if (SrcBitSize < DstBitSize) {
10143     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10144 
10145     SmallVector<SDValue, 8> Ops;
10146     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10147          i += NumInputsPerOutput) {
10148       bool isLE = DAG.getDataLayout().isLittleEndian();
10149       APInt NewBits = APInt(DstBitSize, 0);
10150       bool EltIsUndef = true;
10151       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10152         // Shift the previously computed bits over.
10153         NewBits <<= SrcBitSize;
10154         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10155         if (Op.isUndef()) continue;
10156         EltIsUndef = false;
10157 
10158         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10159                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10160       }
10161 
10162       if (EltIsUndef)
10163         Ops.push_back(DAG.getUNDEF(DstEltVT));
10164       else
10165         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10166     }
10167 
10168     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10169     return DAG.getBuildVector(VT, DL, Ops);
10170   }
10171 
10172   // Finally, this must be the case where we are shrinking elements: each input
10173   // turns into multiple outputs.
10174   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10175   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10176                             NumOutputsPerInput*BV->getNumOperands());
10177   SmallVector<SDValue, 8> Ops;
10178 
10179   for (const SDValue &Op : BV->op_values()) {
10180     if (Op.isUndef()) {
10181       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10182       continue;
10183     }
10184 
10185     APInt OpVal = cast<ConstantSDNode>(Op)->
10186                   getAPIntValue().zextOrTrunc(SrcBitSize);
10187 
10188     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10189       APInt ThisVal = OpVal.trunc(DstBitSize);
10190       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10191       OpVal.lshrInPlace(DstBitSize);
10192     }
10193 
10194     // For big endian targets, swap the order of the pieces of each element.
10195     if (DAG.getDataLayout().isBigEndian())
10196       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10197   }
10198 
10199   return DAG.getBuildVector(VT, DL, Ops);
10200 }
10201 
10202 static bool isContractable(SDNode *N) {
10203   SDNodeFlags F = N->getFlags();
10204   return F.hasAllowContract() || F.hasAllowReassociation();
10205 }
10206 
10207 /// Try to perform FMA combining on a given FADD node.
10208 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10209   SDValue N0 = N->getOperand(0);
10210   SDValue N1 = N->getOperand(1);
10211   EVT VT = N->getValueType(0);
10212   SDLoc SL(N);
10213 
10214   const TargetOptions &Options = DAG.getTarget().Options;
10215 
10216   // Floating-point multiply-add with intermediate rounding.
10217   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10218 
10219   // Floating-point multiply-add without intermediate rounding.
10220   bool HasFMA =
10221       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10222       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10223 
10224   // No valid opcode, do not combine.
10225   if (!HasFMAD && !HasFMA)
10226     return SDValue();
10227 
10228   SDNodeFlags Flags = N->getFlags();
10229   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10230   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10231                               CanFuse || HasFMAD);
10232   // If the addition is not contractable, do not combine.
10233   if (!AllowFusionGlobally && !isContractable(N))
10234     return SDValue();
10235 
10236   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10237   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10238     return SDValue();
10239 
10240   // Always prefer FMAD to FMA for precision.
10241   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10242   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10243 
10244   // Is the node an FMUL and contractable either due to global flags or
10245   // SDNodeFlags.
10246   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10247     if (N.getOpcode() != ISD::FMUL)
10248       return false;
10249     return AllowFusionGlobally || isContractable(N.getNode());
10250   };
10251   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10252   // prefer to fold the multiply with fewer uses.
10253   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10254     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10255       std::swap(N0, N1);
10256   }
10257 
10258   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10259   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10260     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10261                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10262   }
10263 
10264   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10265   // Note: Commutes FADD operands.
10266   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10267     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10268                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10269   }
10270 
10271   // Look through FP_EXTEND nodes to do more combining.
10272 
10273   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10274   if (N0.getOpcode() == ISD::FP_EXTEND) {
10275     SDValue N00 = N0.getOperand(0);
10276     if (isContractableFMUL(N00) &&
10277         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10278       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10279                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10280                                      N00.getOperand(0)),
10281                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10282                                      N00.getOperand(1)), N1, Flags);
10283     }
10284   }
10285 
10286   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10287   // Note: Commutes FADD operands.
10288   if (N1.getOpcode() == ISD::FP_EXTEND) {
10289     SDValue N10 = N1.getOperand(0);
10290     if (isContractableFMUL(N10) &&
10291         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10292       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10293                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10294                                      N10.getOperand(0)),
10295                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10296                                      N10.getOperand(1)), N0, Flags);
10297     }
10298   }
10299 
10300   // More folding opportunities when target permits.
10301   if (Aggressive) {
10302     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10303     if (CanFuse &&
10304         N0.getOpcode() == PreferredFusedOpcode &&
10305         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10306         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10307       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10308                          N0.getOperand(0), N0.getOperand(1),
10309                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10310                                      N0.getOperand(2).getOperand(0),
10311                                      N0.getOperand(2).getOperand(1),
10312                                      N1, Flags), Flags);
10313     }
10314 
10315     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10316     if (CanFuse &&
10317         N1->getOpcode() == PreferredFusedOpcode &&
10318         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10319         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10320       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10321                          N1.getOperand(0), N1.getOperand(1),
10322                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10323                                      N1.getOperand(2).getOperand(0),
10324                                      N1.getOperand(2).getOperand(1),
10325                                      N0, Flags), Flags);
10326     }
10327 
10328 
10329     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10330     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10331     auto FoldFAddFMAFPExtFMul = [&] (
10332       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10333       SDNodeFlags Flags) {
10334       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10335                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10336                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10337                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10338                                      Z, Flags), Flags);
10339     };
10340     if (N0.getOpcode() == PreferredFusedOpcode) {
10341       SDValue N02 = N0.getOperand(2);
10342       if (N02.getOpcode() == ISD::FP_EXTEND) {
10343         SDValue N020 = N02.getOperand(0);
10344         if (isContractableFMUL(N020) &&
10345             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10346           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10347                                       N020.getOperand(0), N020.getOperand(1),
10348                                       N1, Flags);
10349         }
10350       }
10351     }
10352 
10353     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10354     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10355     // FIXME: This turns two single-precision and one double-precision
10356     // operation into two double-precision operations, which might not be
10357     // interesting for all targets, especially GPUs.
10358     auto FoldFAddFPExtFMAFMul = [&] (
10359       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10360       SDNodeFlags Flags) {
10361       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10362                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10363                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10364                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10365                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10366                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10367                                      Z, Flags), Flags);
10368     };
10369     if (N0.getOpcode() == ISD::FP_EXTEND) {
10370       SDValue N00 = N0.getOperand(0);
10371       if (N00.getOpcode() == PreferredFusedOpcode) {
10372         SDValue N002 = N00.getOperand(2);
10373         if (isContractableFMUL(N002) &&
10374             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10375           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10376                                       N002.getOperand(0), N002.getOperand(1),
10377                                       N1, Flags);
10378         }
10379       }
10380     }
10381 
10382     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10383     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10384     if (N1.getOpcode() == PreferredFusedOpcode) {
10385       SDValue N12 = N1.getOperand(2);
10386       if (N12.getOpcode() == ISD::FP_EXTEND) {
10387         SDValue N120 = N12.getOperand(0);
10388         if (isContractableFMUL(N120) &&
10389             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10390           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10391                                       N120.getOperand(0), N120.getOperand(1),
10392                                       N0, Flags);
10393         }
10394       }
10395     }
10396 
10397     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10398     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10399     // FIXME: This turns two single-precision and one double-precision
10400     // operation into two double-precision operations, which might not be
10401     // interesting for all targets, especially GPUs.
10402     if (N1.getOpcode() == ISD::FP_EXTEND) {
10403       SDValue N10 = N1.getOperand(0);
10404       if (N10.getOpcode() == PreferredFusedOpcode) {
10405         SDValue N102 = N10.getOperand(2);
10406         if (isContractableFMUL(N102) &&
10407             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10408           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10409                                       N102.getOperand(0), N102.getOperand(1),
10410                                       N0, Flags);
10411         }
10412       }
10413     }
10414   }
10415 
10416   return SDValue();
10417 }
10418 
10419 /// Try to perform FMA combining on a given FSUB node.
10420 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10421   SDValue N0 = N->getOperand(0);
10422   SDValue N1 = N->getOperand(1);
10423   EVT VT = N->getValueType(0);
10424   SDLoc SL(N);
10425 
10426   const TargetOptions &Options = DAG.getTarget().Options;
10427   // Floating-point multiply-add with intermediate rounding.
10428   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10429 
10430   // Floating-point multiply-add without intermediate rounding.
10431   bool HasFMA =
10432       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10433       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10434 
10435   // No valid opcode, do not combine.
10436   if (!HasFMAD && !HasFMA)
10437     return SDValue();
10438 
10439   const SDNodeFlags Flags = N->getFlags();
10440   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10441   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10442                               CanFuse || HasFMAD);
10443 
10444   // If the subtraction is not contractable, do not combine.
10445   if (!AllowFusionGlobally && !isContractable(N))
10446     return SDValue();
10447 
10448   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10449   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10450     return SDValue();
10451 
10452   // Always prefer FMAD to FMA for precision.
10453   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10454   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10455 
10456   // Is the node an FMUL and contractable either due to global flags or
10457   // SDNodeFlags.
10458   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10459     if (N.getOpcode() != ISD::FMUL)
10460       return false;
10461     return AllowFusionGlobally || isContractable(N.getNode());
10462   };
10463 
10464   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10465   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10466     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10467                        N0.getOperand(0), N0.getOperand(1),
10468                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10469   }
10470 
10471   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10472   // Note: Commutes FSUB operands.
10473   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10474     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10475                        DAG.getNode(ISD::FNEG, SL, VT,
10476                                    N1.getOperand(0)),
10477                        N1.getOperand(1), N0, Flags);
10478   }
10479 
10480   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10481   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10482       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10483     SDValue N00 = N0.getOperand(0).getOperand(0);
10484     SDValue N01 = N0.getOperand(0).getOperand(1);
10485     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10486                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10487                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10488   }
10489 
10490   // Look through FP_EXTEND nodes to do more combining.
10491 
10492   // fold (fsub (fpext (fmul x, y)), z)
10493   //   -> (fma (fpext x), (fpext y), (fneg z))
10494   if (N0.getOpcode() == ISD::FP_EXTEND) {
10495     SDValue N00 = N0.getOperand(0);
10496     if (isContractableFMUL(N00) &&
10497         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10498       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10499                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10500                                      N00.getOperand(0)),
10501                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10502                                      N00.getOperand(1)),
10503                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10504     }
10505   }
10506 
10507   // fold (fsub x, (fpext (fmul y, z)))
10508   //   -> (fma (fneg (fpext y)), (fpext z), x)
10509   // Note: Commutes FSUB operands.
10510   if (N1.getOpcode() == ISD::FP_EXTEND) {
10511     SDValue N10 = N1.getOperand(0);
10512     if (isContractableFMUL(N10) &&
10513         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10514       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10515                          DAG.getNode(ISD::FNEG, SL, VT,
10516                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10517                                                  N10.getOperand(0))),
10518                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10519                                      N10.getOperand(1)),
10520                          N0, Flags);
10521     }
10522   }
10523 
10524   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10525   //   -> (fneg (fma (fpext x), (fpext y), z))
10526   // Note: This could be removed with appropriate canonicalization of the
10527   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10528   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10529   // from implementing the canonicalization in visitFSUB.
10530   if (N0.getOpcode() == ISD::FP_EXTEND) {
10531     SDValue N00 = N0.getOperand(0);
10532     if (N00.getOpcode() == ISD::FNEG) {
10533       SDValue N000 = N00.getOperand(0);
10534       if (isContractableFMUL(N000) &&
10535           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10536         return DAG.getNode(ISD::FNEG, SL, VT,
10537                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10538                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10539                                                    N000.getOperand(0)),
10540                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10541                                                    N000.getOperand(1)),
10542                                        N1, Flags));
10543       }
10544     }
10545   }
10546 
10547   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10548   //   -> (fneg (fma (fpext x)), (fpext y), z)
10549   // Note: This could be removed with appropriate canonicalization of the
10550   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10551   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10552   // from implementing the canonicalization in visitFSUB.
10553   if (N0.getOpcode() == ISD::FNEG) {
10554     SDValue N00 = N0.getOperand(0);
10555     if (N00.getOpcode() == ISD::FP_EXTEND) {
10556       SDValue N000 = N00.getOperand(0);
10557       if (isContractableFMUL(N000) &&
10558           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10559         return DAG.getNode(ISD::FNEG, SL, VT,
10560                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10561                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10562                                                    N000.getOperand(0)),
10563                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10564                                                    N000.getOperand(1)),
10565                                        N1, Flags));
10566       }
10567     }
10568   }
10569 
10570   // More folding opportunities when target permits.
10571   if (Aggressive) {
10572     // fold (fsub (fma x, y, (fmul u, v)), z)
10573     //   -> (fma x, y (fma u, v, (fneg z)))
10574     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10575         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10576         N0.getOperand(2)->hasOneUse()) {
10577       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10578                          N0.getOperand(0), N0.getOperand(1),
10579                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10580                                      N0.getOperand(2).getOperand(0),
10581                                      N0.getOperand(2).getOperand(1),
10582                                      DAG.getNode(ISD::FNEG, SL, VT,
10583                                                  N1), Flags), Flags);
10584     }
10585 
10586     // fold (fsub x, (fma y, z, (fmul u, v)))
10587     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10588     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10589         isContractableFMUL(N1.getOperand(2))) {
10590       SDValue N20 = N1.getOperand(2).getOperand(0);
10591       SDValue N21 = N1.getOperand(2).getOperand(1);
10592       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10593                          DAG.getNode(ISD::FNEG, SL, VT,
10594                                      N1.getOperand(0)),
10595                          N1.getOperand(1),
10596                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10597                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10598                                      N21, N0, Flags), Flags);
10599     }
10600 
10601 
10602     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10603     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10604     if (N0.getOpcode() == PreferredFusedOpcode) {
10605       SDValue N02 = N0.getOperand(2);
10606       if (N02.getOpcode() == ISD::FP_EXTEND) {
10607         SDValue N020 = N02.getOperand(0);
10608         if (isContractableFMUL(N020) &&
10609             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10610           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10611                              N0.getOperand(0), N0.getOperand(1),
10612                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10613                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10614                                                      N020.getOperand(0)),
10615                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10616                                                      N020.getOperand(1)),
10617                                          DAG.getNode(ISD::FNEG, SL, VT,
10618                                                      N1), Flags), Flags);
10619         }
10620       }
10621     }
10622 
10623     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10624     //   -> (fma (fpext x), (fpext y),
10625     //           (fma (fpext u), (fpext v), (fneg z)))
10626     // FIXME: This turns two single-precision and one double-precision
10627     // operation into two double-precision operations, which might not be
10628     // interesting for all targets, especially GPUs.
10629     if (N0.getOpcode() == ISD::FP_EXTEND) {
10630       SDValue N00 = N0.getOperand(0);
10631       if (N00.getOpcode() == PreferredFusedOpcode) {
10632         SDValue N002 = N00.getOperand(2);
10633         if (isContractableFMUL(N002) &&
10634             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10635           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10636                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10637                                          N00.getOperand(0)),
10638                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10639                                          N00.getOperand(1)),
10640                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10641                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10642                                                      N002.getOperand(0)),
10643                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10644                                                      N002.getOperand(1)),
10645                                          DAG.getNode(ISD::FNEG, SL, VT,
10646                                                      N1), Flags), Flags);
10647         }
10648       }
10649     }
10650 
10651     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10652     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10653     if (N1.getOpcode() == PreferredFusedOpcode &&
10654         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10655       SDValue N120 = N1.getOperand(2).getOperand(0);
10656       if (isContractableFMUL(N120) &&
10657           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10658         SDValue N1200 = N120.getOperand(0);
10659         SDValue N1201 = N120.getOperand(1);
10660         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10661                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10662                            N1.getOperand(1),
10663                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10664                                        DAG.getNode(ISD::FNEG, SL, VT,
10665                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10666                                                                VT, N1200)),
10667                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10668                                                    N1201),
10669                                        N0, Flags), Flags);
10670       }
10671     }
10672 
10673     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10674     //   -> (fma (fneg (fpext y)), (fpext z),
10675     //           (fma (fneg (fpext u)), (fpext v), x))
10676     // FIXME: This turns two single-precision and one double-precision
10677     // operation into two double-precision operations, which might not be
10678     // interesting for all targets, especially GPUs.
10679     if (N1.getOpcode() == ISD::FP_EXTEND &&
10680         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10681       SDValue CvtSrc = N1.getOperand(0);
10682       SDValue N100 = CvtSrc.getOperand(0);
10683       SDValue N101 = CvtSrc.getOperand(1);
10684       SDValue N102 = CvtSrc.getOperand(2);
10685       if (isContractableFMUL(N102) &&
10686           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10687         SDValue N1020 = N102.getOperand(0);
10688         SDValue N1021 = N102.getOperand(1);
10689         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10690                            DAG.getNode(ISD::FNEG, SL, VT,
10691                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10692                                                    N100)),
10693                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10694                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10695                                        DAG.getNode(ISD::FNEG, SL, VT,
10696                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10697                                                                VT, N1020)),
10698                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10699                                                    N1021),
10700                                        N0, Flags), Flags);
10701       }
10702     }
10703   }
10704 
10705   return SDValue();
10706 }
10707 
10708 /// Try to perform FMA combining on a given FMUL node based on the distributive
10709 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10710 /// subtraction instead of addition).
10711 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10712   SDValue N0 = N->getOperand(0);
10713   SDValue N1 = N->getOperand(1);
10714   EVT VT = N->getValueType(0);
10715   SDLoc SL(N);
10716   const SDNodeFlags Flags = N->getFlags();
10717 
10718   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10719 
10720   const TargetOptions &Options = DAG.getTarget().Options;
10721 
10722   // The transforms below are incorrect when x == 0 and y == inf, because the
10723   // intermediate multiplication produces a nan.
10724   if (!Options.NoInfsFPMath)
10725     return SDValue();
10726 
10727   // Floating-point multiply-add without intermediate rounding.
10728   bool HasFMA =
10729       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10730       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10731       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10732 
10733   // Floating-point multiply-add with intermediate rounding. This can result
10734   // in a less precise result due to the changed rounding order.
10735   bool HasFMAD = Options.UnsafeFPMath &&
10736                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10737 
10738   // No valid opcode, do not combine.
10739   if (!HasFMAD && !HasFMA)
10740     return SDValue();
10741 
10742   // Always prefer FMAD to FMA for precision.
10743   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10744   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10745 
10746   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
10747   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
10748   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10749     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10750       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
10751         if (C->isExactlyValue(+1.0))
10752           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10753                              Y, Flags);
10754         if (C->isExactlyValue(-1.0))
10755           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10756                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10757       }
10758     }
10759     return SDValue();
10760   };
10761 
10762   if (SDValue FMA = FuseFADD(N0, N1, Flags))
10763     return FMA;
10764   if (SDValue FMA = FuseFADD(N1, N0, Flags))
10765     return FMA;
10766 
10767   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
10768   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
10769   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
10770   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
10771   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10772     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10773       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
10774         if (C0->isExactlyValue(+1.0))
10775           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10776                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10777                              Y, Flags);
10778         if (C0->isExactlyValue(-1.0))
10779           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10780                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10781                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10782       }
10783       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
10784         if (C1->isExactlyValue(+1.0))
10785           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10786                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10787         if (C1->isExactlyValue(-1.0))
10788           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10789                              Y, Flags);
10790       }
10791     }
10792     return SDValue();
10793   };
10794 
10795   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10796     return FMA;
10797   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10798     return FMA;
10799 
10800   return SDValue();
10801 }
10802 
10803 SDValue DAGCombiner::visitFADD(SDNode *N) {
10804   SDValue N0 = N->getOperand(0);
10805   SDValue N1 = N->getOperand(1);
10806   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10807   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10808   EVT VT = N->getValueType(0);
10809   SDLoc DL(N);
10810   const TargetOptions &Options = DAG.getTarget().Options;
10811   const SDNodeFlags Flags = N->getFlags();
10812 
10813   // fold vector ops
10814   if (VT.isVector())
10815     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10816       return FoldedVOp;
10817 
10818   // fold (fadd c1, c2) -> c1 + c2
10819   if (N0CFP && N1CFP)
10820     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10821 
10822   // canonicalize constant to RHS
10823   if (N0CFP && !N1CFP)
10824     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10825 
10826   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
10827   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
10828   if (N1C && N1C->isZero())
10829     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
10830       return N0;
10831 
10832   if (SDValue NewSel = foldBinOpIntoSelect(N))
10833     return NewSel;
10834 
10835   // fold (fadd A, (fneg B)) -> (fsub A, B)
10836   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10837       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10838     return DAG.getNode(ISD::FSUB, DL, VT, N0,
10839                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10840 
10841   // fold (fadd (fneg A), B) -> (fsub B, A)
10842   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10843       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10844     return DAG.getNode(ISD::FSUB, DL, VT, N1,
10845                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10846 
10847   auto isFMulNegTwo = [](SDValue FMul) {
10848     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
10849       return false;
10850     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
10851     return C && C->isExactlyValue(-2.0);
10852   };
10853 
10854   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
10855   if (isFMulNegTwo(N0)) {
10856     SDValue B = N0.getOperand(0);
10857     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
10858     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
10859   }
10860   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
10861   if (isFMulNegTwo(N1)) {
10862     SDValue B = N1.getOperand(0);
10863     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
10864     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
10865   }
10866 
10867   // No FP constant should be created after legalization as Instruction
10868   // Selection pass has a hard time dealing with FP constants.
10869   bool AllowNewConst = (Level < AfterLegalizeDAG);
10870 
10871   // If 'unsafe math' or nnan is enabled, fold lots of things.
10872   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10873     // If allowed, fold (fadd (fneg x), x) -> 0.0
10874     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10875       return DAG.getConstantFP(0.0, DL, VT);
10876 
10877     // If allowed, fold (fadd x, (fneg x)) -> 0.0
10878     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10879       return DAG.getConstantFP(0.0, DL, VT);
10880   }
10881 
10882   // If 'unsafe math' or reassoc and nsz, fold lots of things.
10883   // TODO: break out portions of the transformations below for which Unsafe is
10884   //       considered and which do not require both nsz and reassoc
10885   if ((Options.UnsafeFPMath ||
10886        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10887       AllowNewConst) {
10888     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10889     if (N1CFP && N0.getOpcode() == ISD::FADD &&
10890         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10891       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10892       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10893     }
10894 
10895     // We can fold chains of FADD's of the same value into multiplications.
10896     // This transform is not safe in general because we are reducing the number
10897     // of rounding steps.
10898     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10899       if (N0.getOpcode() == ISD::FMUL) {
10900         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10901         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10902 
10903         // (fadd (fmul x, c), x) -> (fmul x, c+1)
10904         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10905           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10906                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10907           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10908         }
10909 
10910         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10911         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10912             N1.getOperand(0) == N1.getOperand(1) &&
10913             N0.getOperand(0) == N1.getOperand(0)) {
10914           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10915                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10916           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10917         }
10918       }
10919 
10920       if (N1.getOpcode() == ISD::FMUL) {
10921         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10922         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10923 
10924         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10925         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10926           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10927                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10928           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10929         }
10930 
10931         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10932         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10933             N0.getOperand(0) == N0.getOperand(1) &&
10934             N1.getOperand(0) == N0.getOperand(0)) {
10935           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10936                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10937           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10938         }
10939       }
10940 
10941       if (N0.getOpcode() == ISD::FADD) {
10942         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10943         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10944         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10945             (N0.getOperand(0) == N1)) {
10946           return DAG.getNode(ISD::FMUL, DL, VT,
10947                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10948         }
10949       }
10950 
10951       if (N1.getOpcode() == ISD::FADD) {
10952         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10953         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10954         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10955             N1.getOperand(0) == N0) {
10956           return DAG.getNode(ISD::FMUL, DL, VT,
10957                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10958         }
10959       }
10960 
10961       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10962       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10963           N0.getOperand(0) == N0.getOperand(1) &&
10964           N1.getOperand(0) == N1.getOperand(1) &&
10965           N0.getOperand(0) == N1.getOperand(0)) {
10966         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10967                            DAG.getConstantFP(4.0, DL, VT), Flags);
10968       }
10969     }
10970   } // enable-unsafe-fp-math
10971 
10972   // FADD -> FMA combines:
10973   if (SDValue Fused = visitFADDForFMACombine(N)) {
10974     AddToWorklist(Fused.getNode());
10975     return Fused;
10976   }
10977   return SDValue();
10978 }
10979 
10980 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10981   SDValue N0 = N->getOperand(0);
10982   SDValue N1 = N->getOperand(1);
10983   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
10984   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
10985   EVT VT = N->getValueType(0);
10986   SDLoc DL(N);
10987   const TargetOptions &Options = DAG.getTarget().Options;
10988   const SDNodeFlags Flags = N->getFlags();
10989 
10990   // fold vector ops
10991   if (VT.isVector())
10992     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10993       return FoldedVOp;
10994 
10995   // fold (fsub c1, c2) -> c1-c2
10996   if (N0CFP && N1CFP)
10997     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10998 
10999   if (SDValue NewSel = foldBinOpIntoSelect(N))
11000     return NewSel;
11001 
11002   // (fsub A, 0) -> A
11003   if (N1CFP && N1CFP->isZero()) {
11004     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11005         Flags.hasNoSignedZeros()) {
11006       return N0;
11007     }
11008   }
11009 
11010   if (N0 == N1) {
11011     // (fsub x, x) -> 0.0
11012     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11013       return DAG.getConstantFP(0.0f, DL, VT);
11014   }
11015 
11016   // (fsub -0.0, N1) -> -N1
11017   if (N0CFP && N0CFP->isZero()) {
11018     if (N0CFP->isNegative() ||
11019         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11020       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11021         return GetNegatedExpression(N1, DAG, LegalOperations);
11022       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11023         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11024     }
11025   }
11026 
11027   if ((Options.UnsafeFPMath ||
11028       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11029       && N1.getOpcode() == ISD::FADD) {
11030     // X - (X + Y) -> -Y
11031     if (N0 == N1->getOperand(0))
11032       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11033     // X - (Y + X) -> -Y
11034     if (N0 == N1->getOperand(1))
11035       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11036   }
11037 
11038   // fold (fsub A, (fneg B)) -> (fadd A, B)
11039   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11040     return DAG.getNode(ISD::FADD, DL, VT, N0,
11041                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11042 
11043   // FSUB -> FMA combines:
11044   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11045     AddToWorklist(Fused.getNode());
11046     return Fused;
11047   }
11048 
11049   return SDValue();
11050 }
11051 
11052 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11053   SDValue N0 = N->getOperand(0);
11054   SDValue N1 = N->getOperand(1);
11055   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11056   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11057   EVT VT = N->getValueType(0);
11058   SDLoc DL(N);
11059   const TargetOptions &Options = DAG.getTarget().Options;
11060   const SDNodeFlags Flags = N->getFlags();
11061 
11062   // fold vector ops
11063   if (VT.isVector()) {
11064     // This just handles C1 * C2 for vectors. Other vector folds are below.
11065     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11066       return FoldedVOp;
11067   }
11068 
11069   // fold (fmul c1, c2) -> c1*c2
11070   if (N0CFP && N1CFP)
11071     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11072 
11073   // canonicalize constant to RHS
11074   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11075      !isConstantFPBuildVectorOrConstantFP(N1))
11076     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11077 
11078   // fold (fmul A, 1.0) -> A
11079   if (N1CFP && N1CFP->isExactlyValue(1.0))
11080     return N0;
11081 
11082   if (SDValue NewSel = foldBinOpIntoSelect(N))
11083     return NewSel;
11084 
11085   if (Options.UnsafeFPMath ||
11086       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11087     // fold (fmul A, 0) -> 0
11088     if (N1CFP && N1CFP->isZero())
11089       return N1;
11090   }
11091 
11092   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11093     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11094     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11095         N0.getOpcode() == ISD::FMUL) {
11096       SDValue N00 = N0.getOperand(0);
11097       SDValue N01 = N0.getOperand(1);
11098       // Avoid an infinite loop by making sure that N00 is not a constant
11099       // (the inner multiply has not been constant folded yet).
11100       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11101           !isConstantFPBuildVectorOrConstantFP(N00)) {
11102         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11103         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11104       }
11105     }
11106 
11107     // Match a special-case: we convert X * 2.0 into fadd.
11108     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11109     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11110         N0.getOperand(0) == N0.getOperand(1)) {
11111       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11112       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11113       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11114     }
11115   }
11116 
11117   // fold (fmul X, 2.0) -> (fadd X, X)
11118   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11119     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11120 
11121   // fold (fmul X, -1.0) -> (fneg X)
11122   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11123     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11124       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11125 
11126   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11127   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11128     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11129       // Both can be negated for free, check to see if at least one is cheaper
11130       // negated.
11131       if (LHSNeg == 2 || RHSNeg == 2)
11132         return DAG.getNode(ISD::FMUL, DL, VT,
11133                            GetNegatedExpression(N0, DAG, LegalOperations),
11134                            GetNegatedExpression(N1, DAG, LegalOperations),
11135                            Flags);
11136     }
11137   }
11138 
11139   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11140   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11141   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11142       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11143       TLI.isOperationLegal(ISD::FABS, VT)) {
11144     SDValue Select = N0, X = N1;
11145     if (Select.getOpcode() != ISD::SELECT)
11146       std::swap(Select, X);
11147 
11148     SDValue Cond = Select.getOperand(0);
11149     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11150     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11151 
11152     if (TrueOpnd && FalseOpnd &&
11153         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11154         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11155         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11156       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11157       switch (CC) {
11158       default: break;
11159       case ISD::SETOLT:
11160       case ISD::SETULT:
11161       case ISD::SETOLE:
11162       case ISD::SETULE:
11163       case ISD::SETLT:
11164       case ISD::SETLE:
11165         std::swap(TrueOpnd, FalseOpnd);
11166         LLVM_FALLTHROUGH;
11167       case ISD::SETOGT:
11168       case ISD::SETUGT:
11169       case ISD::SETOGE:
11170       case ISD::SETUGE:
11171       case ISD::SETGT:
11172       case ISD::SETGE:
11173         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11174             TLI.isOperationLegal(ISD::FNEG, VT))
11175           return DAG.getNode(ISD::FNEG, DL, VT,
11176                    DAG.getNode(ISD::FABS, DL, VT, X));
11177         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11178           return DAG.getNode(ISD::FABS, DL, VT, X);
11179 
11180         break;
11181       }
11182     }
11183   }
11184 
11185   // FMUL -> FMA combines:
11186   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11187     AddToWorklist(Fused.getNode());
11188     return Fused;
11189   }
11190 
11191   return SDValue();
11192 }
11193 
11194 SDValue DAGCombiner::visitFMA(SDNode *N) {
11195   SDValue N0 = N->getOperand(0);
11196   SDValue N1 = N->getOperand(1);
11197   SDValue N2 = N->getOperand(2);
11198   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11199   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11200   EVT VT = N->getValueType(0);
11201   SDLoc DL(N);
11202   const TargetOptions &Options = DAG.getTarget().Options;
11203 
11204   // FMA nodes have flags that propagate to the created nodes.
11205   const SDNodeFlags Flags = N->getFlags();
11206   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11207 
11208   // Constant fold FMA.
11209   if (isa<ConstantFPSDNode>(N0) &&
11210       isa<ConstantFPSDNode>(N1) &&
11211       isa<ConstantFPSDNode>(N2)) {
11212     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11213   }
11214 
11215   if (UnsafeFPMath) {
11216     if (N0CFP && N0CFP->isZero())
11217       return N2;
11218     if (N1CFP && N1CFP->isZero())
11219       return N2;
11220   }
11221   // TODO: The FMA node should have flags that propagate to these nodes.
11222   if (N0CFP && N0CFP->isExactlyValue(1.0))
11223     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11224   if (N1CFP && N1CFP->isExactlyValue(1.0))
11225     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11226 
11227   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11228   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11229      !isConstantFPBuildVectorOrConstantFP(N1))
11230     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11231 
11232   if (UnsafeFPMath) {
11233     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11234     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11235         isConstantFPBuildVectorOrConstantFP(N1) &&
11236         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11237       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11238                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11239                                      Flags), Flags);
11240     }
11241 
11242     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11243     if (N0.getOpcode() == ISD::FMUL &&
11244         isConstantFPBuildVectorOrConstantFP(N1) &&
11245         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11246       return DAG.getNode(ISD::FMA, DL, VT,
11247                          N0.getOperand(0),
11248                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11249                                      Flags),
11250                          N2);
11251     }
11252   }
11253 
11254   // (fma x, 1, y) -> (fadd x, y)
11255   // (fma x, -1, y) -> (fadd (fneg x), y)
11256   if (N1CFP) {
11257     if (N1CFP->isExactlyValue(1.0))
11258       // TODO: The FMA node should have flags that propagate to this node.
11259       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11260 
11261     if (N1CFP->isExactlyValue(-1.0) &&
11262         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11263       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11264       AddToWorklist(RHSNeg.getNode());
11265       // TODO: The FMA node should have flags that propagate to this node.
11266       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11267     }
11268 
11269     // fma (fneg x), K, y -> fma x -K, y
11270     if (N0.getOpcode() == ISD::FNEG &&
11271         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11272          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11273       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11274                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11275     }
11276   }
11277 
11278   if (UnsafeFPMath) {
11279     // (fma x, c, x) -> (fmul x, (c+1))
11280     if (N1CFP && N0 == N2) {
11281       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11282                          DAG.getNode(ISD::FADD, DL, VT, N1,
11283                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11284                          Flags);
11285     }
11286 
11287     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11288     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11289       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11290                          DAG.getNode(ISD::FADD, DL, VT, N1,
11291                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11292                          Flags);
11293     }
11294   }
11295 
11296   return SDValue();
11297 }
11298 
11299 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11300 // reciprocal.
11301 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11302 // Notice that this is not always beneficial. One reason is different targets
11303 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11304 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11305 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11306 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11307   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11308   const SDNodeFlags Flags = N->getFlags();
11309   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11310     return SDValue();
11311 
11312   // Skip if current node is a reciprocal.
11313   SDValue N0 = N->getOperand(0);
11314   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11315   if (N0CFP && N0CFP->isExactlyValue(1.0))
11316     return SDValue();
11317 
11318   // Exit early if the target does not want this transform or if there can't
11319   // possibly be enough uses of the divisor to make the transform worthwhile.
11320   SDValue N1 = N->getOperand(1);
11321   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11322   if (!MinUses || N1->use_size() < MinUses)
11323     return SDValue();
11324 
11325   // Find all FDIV users of the same divisor.
11326   // Use a set because duplicates may be present in the user list.
11327   SetVector<SDNode *> Users;
11328   for (auto *U : N1->uses()) {
11329     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11330       // This division is eligible for optimization only if global unsafe math
11331       // is enabled or if this division allows reciprocal formation.
11332       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11333         Users.insert(U);
11334     }
11335   }
11336 
11337   // Now that we have the actual number of divisor uses, make sure it meets
11338   // the minimum threshold specified by the target.
11339   if (Users.size() < MinUses)
11340     return SDValue();
11341 
11342   EVT VT = N->getValueType(0);
11343   SDLoc DL(N);
11344   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11345   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11346 
11347   // Dividend / Divisor -> Dividend * Reciprocal
11348   for (auto *U : Users) {
11349     SDValue Dividend = U->getOperand(0);
11350     if (Dividend != FPOne) {
11351       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11352                                     Reciprocal, Flags);
11353       CombineTo(U, NewNode);
11354     } else if (U != Reciprocal.getNode()) {
11355       // In the absence of fast-math-flags, this user node is always the
11356       // same node as Reciprocal, but with FMF they may be different nodes.
11357       CombineTo(U, Reciprocal);
11358     }
11359   }
11360   return SDValue(N, 0);  // N was replaced.
11361 }
11362 
11363 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11364   SDValue N0 = N->getOperand(0);
11365   SDValue N1 = N->getOperand(1);
11366   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11367   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11368   EVT VT = N->getValueType(0);
11369   SDLoc DL(N);
11370   const TargetOptions &Options = DAG.getTarget().Options;
11371   SDNodeFlags Flags = N->getFlags();
11372 
11373   // fold vector ops
11374   if (VT.isVector())
11375     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11376       return FoldedVOp;
11377 
11378   // fold (fdiv c1, c2) -> c1/c2
11379   if (N0CFP && N1CFP)
11380     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11381 
11382   if (SDValue NewSel = foldBinOpIntoSelect(N))
11383     return NewSel;
11384 
11385   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11386     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11387     if (N1CFP) {
11388       // Compute the reciprocal 1.0 / c2.
11389       const APFloat &N1APF = N1CFP->getValueAPF();
11390       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11391       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11392       // Only do the transform if the reciprocal is a legal fp immediate that
11393       // isn't too nasty (eg NaN, denormal, ...).
11394       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11395           (!LegalOperations ||
11396            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11397            // backend)... we should handle this gracefully after Legalize.
11398            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11399            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11400            TLI.isFPImmLegal(Recip, VT)))
11401         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11402                            DAG.getConstantFP(Recip, DL, VT), Flags);
11403     }
11404 
11405     // If this FDIV is part of a reciprocal square root, it may be folded
11406     // into a target-specific square root estimate instruction.
11407     if (N1.getOpcode() == ISD::FSQRT) {
11408       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11409         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11410       }
11411     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11412                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11413       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11414                                           Flags)) {
11415         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11416         AddToWorklist(RV.getNode());
11417         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11418       }
11419     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11420                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11421       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11422                                           Flags)) {
11423         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11424         AddToWorklist(RV.getNode());
11425         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11426       }
11427     } else if (N1.getOpcode() == ISD::FMUL) {
11428       // Look through an FMUL. Even though this won't remove the FDIV directly,
11429       // it's still worthwhile to get rid of the FSQRT if possible.
11430       SDValue SqrtOp;
11431       SDValue OtherOp;
11432       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11433         SqrtOp = N1.getOperand(0);
11434         OtherOp = N1.getOperand(1);
11435       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11436         SqrtOp = N1.getOperand(1);
11437         OtherOp = N1.getOperand(0);
11438       }
11439       if (SqrtOp.getNode()) {
11440         // We found a FSQRT, so try to make this fold:
11441         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11442         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11443           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11444           AddToWorklist(RV.getNode());
11445           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11446         }
11447       }
11448     }
11449 
11450     // Fold into a reciprocal estimate and multiply instead of a real divide.
11451     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11452       AddToWorklist(RV.getNode());
11453       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11454     }
11455   }
11456 
11457   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11458   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11459     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11460       // Both can be negated for free, check to see if at least one is cheaper
11461       // negated.
11462       if (LHSNeg == 2 || RHSNeg == 2)
11463         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11464                            GetNegatedExpression(N0, DAG, LegalOperations),
11465                            GetNegatedExpression(N1, DAG, LegalOperations),
11466                            Flags);
11467     }
11468   }
11469 
11470   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11471     return CombineRepeatedDivisors;
11472 
11473   return SDValue();
11474 }
11475 
11476 SDValue DAGCombiner::visitFREM(SDNode *N) {
11477   SDValue N0 = N->getOperand(0);
11478   SDValue N1 = N->getOperand(1);
11479   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11480   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11481   EVT VT = N->getValueType(0);
11482 
11483   // fold (frem c1, c2) -> fmod(c1,c2)
11484   if (N0CFP && N1CFP)
11485     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11486 
11487   if (SDValue NewSel = foldBinOpIntoSelect(N))
11488     return NewSel;
11489 
11490   return SDValue();
11491 }
11492 
11493 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11494   SDNodeFlags Flags = N->getFlags();
11495   if (!DAG.getTarget().Options.UnsafeFPMath &&
11496       !Flags.hasApproximateFuncs())
11497     return SDValue();
11498 
11499   SDValue N0 = N->getOperand(0);
11500   if (TLI.isFsqrtCheap(N0, DAG))
11501     return SDValue();
11502 
11503   // FSQRT nodes have flags that propagate to the created nodes.
11504   return buildSqrtEstimate(N0, Flags);
11505 }
11506 
11507 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11508 /// copysign(x, fp_round(y)) -> copysign(x, y)
11509 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11510   SDValue N1 = N->getOperand(1);
11511   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11512        N1.getOpcode() == ISD::FP_ROUND)) {
11513     // Do not optimize out type conversion of f128 type yet.
11514     // For some targets like x86_64, configuration is changed to keep one f128
11515     // value in one SSE register, but instruction selection cannot handle
11516     // FCOPYSIGN on SSE registers yet.
11517     EVT N1VT = N1->getValueType(0);
11518     EVT N1Op0VT = N1->getOperand(0).getValueType();
11519     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11520   }
11521   return false;
11522 }
11523 
11524 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11525   SDValue N0 = N->getOperand(0);
11526   SDValue N1 = N->getOperand(1);
11527   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11528   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11529   EVT VT = N->getValueType(0);
11530 
11531   if (N0CFP && N1CFP) // Constant fold
11532     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11533 
11534   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
11535     const APFloat &V = N1C->getValueAPF();
11536     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11537     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11538     if (!V.isNegative()) {
11539       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11540         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11541     } else {
11542       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11543         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11544                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11545     }
11546   }
11547 
11548   // copysign(fabs(x), y) -> copysign(x, y)
11549   // copysign(fneg(x), y) -> copysign(x, y)
11550   // copysign(copysign(x,z), y) -> copysign(x, y)
11551   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11552       N0.getOpcode() == ISD::FCOPYSIGN)
11553     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11554 
11555   // copysign(x, abs(y)) -> abs(x)
11556   if (N1.getOpcode() == ISD::FABS)
11557     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11558 
11559   // copysign(x, copysign(y,z)) -> copysign(x, z)
11560   if (N1.getOpcode() == ISD::FCOPYSIGN)
11561     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11562 
11563   // copysign(x, fp_extend(y)) -> copysign(x, y)
11564   // copysign(x, fp_round(y)) -> copysign(x, y)
11565   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11566     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11567 
11568   return SDValue();
11569 }
11570 
11571 SDValue DAGCombiner::visitFPOW(SDNode *N) {
11572   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
11573   if (!ExponentC)
11574     return SDValue();
11575 
11576   // Try to convert x ** (1/3) into cube root.
11577   // TODO: Handle the various flavors of long double.
11578   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11579   //       Some range near 1/3 should be fine.
11580   EVT VT = N->getValueType(0);
11581   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11582       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11583     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11584     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11585     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
11586     // For regular numbers, rounding may cause the results to differ.
11587     // Therefore, we require { nsz ninf nnan afn } for this transform.
11588     // TODO: We could select out the special cases if we don't have nsz/ninf.
11589     SDNodeFlags Flags = N->getFlags();
11590     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11591         !Flags.hasApproximateFuncs())
11592       return SDValue();
11593 
11594     // Do not create a cbrt() libcall if the target does not have it, and do not
11595     // turn a pow that has lowering support into a cbrt() libcall.
11596     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
11597         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
11598          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
11599       return SDValue();
11600 
11601     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
11602   }
11603 
11604   // Try to convert x ** (1/4) into square roots.
11605   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
11606   // TODO: This could be extended (using a target hook) to handle smaller
11607   // power-of-2 fractional exponents.
11608   if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
11609     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
11610     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
11611     // For regular numbers, rounding may cause the results to differ.
11612     // Therefore, we require { nsz ninf afn } for this transform.
11613     // TODO: We could select out the special cases if we don't have nsz/ninf.
11614     SDNodeFlags Flags = N->getFlags();
11615     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
11616         !Flags.hasApproximateFuncs())
11617       return SDValue();
11618 
11619     // Don't double the number of libcalls. We are trying to inline fast code.
11620     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
11621       return SDValue();
11622 
11623     // Assume that libcalls are the smallest code.
11624     // TODO: This restriction should probably be lifted for vectors.
11625     if (DAG.getMachineFunction().getFunction().optForSize())
11626       return SDValue();
11627 
11628     // pow(X, 0.25) --> sqrt(sqrt(X))
11629     SDLoc DL(N);
11630     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
11631     return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
11632   }
11633 
11634   return SDValue();
11635 }
11636 
11637 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11638                                const TargetLowering &TLI) {
11639   // This optimization is guarded by a function attribute because it may produce
11640   // unexpected results. Ie, programs may be relying on the platform-specific
11641   // undefined behavior when the float-to-int conversion overflows.
11642   const Function &F = DAG.getMachineFunction().getFunction();
11643   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11644   if (StrictOverflow.getValueAsString().equals("false"))
11645     return SDValue();
11646 
11647   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11648   // replacing casts with a libcall. We also must be allowed to ignore -0.0
11649   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11650   // conversions would return +0.0.
11651   // FIXME: We should be able to use node-level FMF here.
11652   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11653   EVT VT = N->getValueType(0);
11654   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11655       !DAG.getTarget().Options.NoSignedZerosFPMath)
11656     return SDValue();
11657 
11658   // fptosi/fptoui round towards zero, so converting from FP to integer and
11659   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11660   SDValue N0 = N->getOperand(0);
11661   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11662       N0.getOperand(0).getValueType() == VT)
11663     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11664 
11665   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11666       N0.getOperand(0).getValueType() == VT)
11667     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11668 
11669   return SDValue();
11670 }
11671 
11672 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11673   SDValue N0 = N->getOperand(0);
11674   EVT VT = N->getValueType(0);
11675   EVT OpVT = N0.getValueType();
11676 
11677   // fold (sint_to_fp c1) -> c1fp
11678   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11679       // ...but only if the target supports immediate floating-point values
11680       (!LegalOperations ||
11681        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11682     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11683 
11684   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11685   // but UINT_TO_FP is legal on this target, try to convert.
11686   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
11687       hasOperation(ISD::UINT_TO_FP, OpVT)) {
11688     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11689     if (DAG.SignBitIsZero(N0))
11690       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11691   }
11692 
11693   // The next optimizations are desirable only if SELECT_CC can be lowered.
11694   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11695     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11696     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11697         !VT.isVector() &&
11698         (!LegalOperations ||
11699          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11700       SDLoc DL(N);
11701       SDValue Ops[] =
11702         { N0.getOperand(0), N0.getOperand(1),
11703           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11704           N0.getOperand(2) };
11705       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11706     }
11707 
11708     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11709     //      (select_cc x, y, 1.0, 0.0,, cc)
11710     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11711         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11712         (!LegalOperations ||
11713          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11714       SDLoc DL(N);
11715       SDValue Ops[] =
11716         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11717           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11718           N0.getOperand(0).getOperand(2) };
11719       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11720     }
11721   }
11722 
11723   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11724     return FTrunc;
11725 
11726   return SDValue();
11727 }
11728 
11729 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11730   SDValue N0 = N->getOperand(0);
11731   EVT VT = N->getValueType(0);
11732   EVT OpVT = N0.getValueType();
11733 
11734   // fold (uint_to_fp c1) -> c1fp
11735   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11736       // ...but only if the target supports immediate floating-point values
11737       (!LegalOperations ||
11738        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11739     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11740 
11741   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11742   // but SINT_TO_FP is legal on this target, try to convert.
11743   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
11744       hasOperation(ISD::SINT_TO_FP, OpVT)) {
11745     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11746     if (DAG.SignBitIsZero(N0))
11747       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11748   }
11749 
11750   // The next optimizations are desirable only if SELECT_CC can be lowered.
11751   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11752     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11753     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11754         (!LegalOperations ||
11755          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11756       SDLoc DL(N);
11757       SDValue Ops[] =
11758         { N0.getOperand(0), N0.getOperand(1),
11759           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11760           N0.getOperand(2) };
11761       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11762     }
11763   }
11764 
11765   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11766     return FTrunc;
11767 
11768   return SDValue();
11769 }
11770 
11771 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11772 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11773   SDValue N0 = N->getOperand(0);
11774   EVT VT = N->getValueType(0);
11775 
11776   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11777     return SDValue();
11778 
11779   SDValue Src = N0.getOperand(0);
11780   EVT SrcVT = Src.getValueType();
11781   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11782   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11783 
11784   // We can safely assume the conversion won't overflow the output range,
11785   // because (for example) (uint8_t)18293.f is undefined behavior.
11786 
11787   // Since we can assume the conversion won't overflow, our decision as to
11788   // whether the input will fit in the float should depend on the minimum
11789   // of the input range and output range.
11790 
11791   // This means this is also safe for a signed input and unsigned output, since
11792   // a negative input would lead to undefined behavior.
11793   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11794   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11795   unsigned ActualSize = std::min(InputSize, OutputSize);
11796   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11797 
11798   // We can only fold away the float conversion if the input range can be
11799   // represented exactly in the float range.
11800   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11801     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11802       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11803                                                        : ISD::ZERO_EXTEND;
11804       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11805     }
11806     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11807       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11808     return DAG.getBitcast(VT, Src);
11809   }
11810   return SDValue();
11811 }
11812 
11813 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11814   SDValue N0 = N->getOperand(0);
11815   EVT VT = N->getValueType(0);
11816 
11817   // fold (fp_to_sint c1fp) -> c1
11818   if (isConstantFPBuildVectorOrConstantFP(N0))
11819     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11820 
11821   return FoldIntToFPToInt(N, DAG);
11822 }
11823 
11824 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11825   SDValue N0 = N->getOperand(0);
11826   EVT VT = N->getValueType(0);
11827 
11828   // fold (fp_to_uint c1fp) -> c1
11829   if (isConstantFPBuildVectorOrConstantFP(N0))
11830     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11831 
11832   return FoldIntToFPToInt(N, DAG);
11833 }
11834 
11835 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11836   SDValue N0 = N->getOperand(0);
11837   SDValue N1 = N->getOperand(1);
11838   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11839   EVT VT = N->getValueType(0);
11840 
11841   // fold (fp_round c1fp) -> c1fp
11842   if (N0CFP)
11843     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11844 
11845   // fold (fp_round (fp_extend x)) -> x
11846   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11847     return N0.getOperand(0);
11848 
11849   // fold (fp_round (fp_round x)) -> (fp_round x)
11850   if (N0.getOpcode() == ISD::FP_ROUND) {
11851     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11852     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11853 
11854     // Skip this folding if it results in an fp_round from f80 to f16.
11855     //
11856     // f80 to f16 always generates an expensive (and as yet, unimplemented)
11857     // libcall to __truncxfhf2 instead of selecting native f16 conversion
11858     // instructions from f32 or f64.  Moreover, the first (value-preserving)
11859     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11860     // x86.
11861     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11862       return SDValue();
11863 
11864     // If the first fp_round isn't a value preserving truncation, it might
11865     // introduce a tie in the second fp_round, that wouldn't occur in the
11866     // single-step fp_round we want to fold to.
11867     // In other words, double rounding isn't the same as rounding.
11868     // Also, this is a value preserving truncation iff both fp_round's are.
11869     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11870       SDLoc DL(N);
11871       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11872                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11873     }
11874   }
11875 
11876   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11877   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11878     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11879                               N0.getOperand(0), N1);
11880     AddToWorklist(Tmp.getNode());
11881     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11882                        Tmp, N0.getOperand(1));
11883   }
11884 
11885   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11886     return NewVSel;
11887 
11888   return SDValue();
11889 }
11890 
11891 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11892   SDValue N0 = N->getOperand(0);
11893   EVT VT = N->getValueType(0);
11894   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11895   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11896 
11897   // fold (fp_round_inreg c1fp) -> c1fp
11898   if (N0CFP && isTypeLegal(EVT)) {
11899     SDLoc DL(N);
11900     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11901     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11902   }
11903 
11904   return SDValue();
11905 }
11906 
11907 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11908   SDValue N0 = N->getOperand(0);
11909   EVT VT = N->getValueType(0);
11910 
11911   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11912   if (N->hasOneUse() &&
11913       N->use_begin()->getOpcode() == ISD::FP_ROUND)
11914     return SDValue();
11915 
11916   // fold (fp_extend c1fp) -> c1fp
11917   if (isConstantFPBuildVectorOrConstantFP(N0))
11918     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11919 
11920   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11921   if (N0.getOpcode() == ISD::FP16_TO_FP &&
11922       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11923     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11924 
11925   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11926   // value of X.
11927   if (N0.getOpcode() == ISD::FP_ROUND
11928       && N0.getConstantOperandVal(1) == 1) {
11929     SDValue In = N0.getOperand(0);
11930     if (In.getValueType() == VT) return In;
11931     if (VT.bitsLT(In.getValueType()))
11932       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11933                          In, N0.getOperand(1));
11934     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11935   }
11936 
11937   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11938   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11939        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11940     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11941     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11942                                      LN0->getChain(),
11943                                      LN0->getBasePtr(), N0.getValueType(),
11944                                      LN0->getMemOperand());
11945     CombineTo(N, ExtLoad);
11946     CombineTo(N0.getNode(),
11947               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11948                           N0.getValueType(), ExtLoad,
11949                           DAG.getIntPtrConstant(1, SDLoc(N0))),
11950               ExtLoad.getValue(1));
11951     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11952   }
11953 
11954   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11955     return NewVSel;
11956 
11957   return SDValue();
11958 }
11959 
11960 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
11961   SDValue N0 = N->getOperand(0);
11962   EVT VT = N->getValueType(0);
11963 
11964   // fold (fceil c1) -> fceil(c1)
11965   if (isConstantFPBuildVectorOrConstantFP(N0))
11966     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
11967 
11968   return SDValue();
11969 }
11970 
11971 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
11972   SDValue N0 = N->getOperand(0);
11973   EVT VT = N->getValueType(0);
11974 
11975   // fold (ftrunc c1) -> ftrunc(c1)
11976   if (isConstantFPBuildVectorOrConstantFP(N0))
11977     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
11978 
11979   // fold ftrunc (known rounded int x) -> x
11980   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
11981   // likely to be generated to extract integer from a rounded floating value.
11982   switch (N0.getOpcode()) {
11983   default: break;
11984   case ISD::FRINT:
11985   case ISD::FTRUNC:
11986   case ISD::FNEARBYINT:
11987   case ISD::FFLOOR:
11988   case ISD::FCEIL:
11989     return N0;
11990   }
11991 
11992   return SDValue();
11993 }
11994 
11995 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11996   SDValue N0 = N->getOperand(0);
11997   EVT VT = N->getValueType(0);
11998 
11999   // fold (ffloor c1) -> ffloor(c1)
12000   if (isConstantFPBuildVectorOrConstantFP(N0))
12001     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12002 
12003   return SDValue();
12004 }
12005 
12006 // FIXME: FNEG and FABS have a lot in common; refactor.
12007 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12008   SDValue N0 = N->getOperand(0);
12009   EVT VT = N->getValueType(0);
12010 
12011   // Constant fold FNEG.
12012   if (isConstantFPBuildVectorOrConstantFP(N0))
12013     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12014 
12015   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12016                          &DAG.getTarget().Options))
12017     return GetNegatedExpression(N0, DAG, LegalOperations);
12018 
12019   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12020   // constant pool values.
12021   if (!TLI.isFNegFree(VT) &&
12022       N0.getOpcode() == ISD::BITCAST &&
12023       N0.getNode()->hasOneUse()) {
12024     SDValue Int = N0.getOperand(0);
12025     EVT IntVT = Int.getValueType();
12026     if (IntVT.isInteger() && !IntVT.isVector()) {
12027       APInt SignMask;
12028       if (N0.getValueType().isVector()) {
12029         // For a vector, get a mask such as 0x80... per scalar element
12030         // and splat it.
12031         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12032         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12033       } else {
12034         // For a scalar, just generate 0x80...
12035         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12036       }
12037       SDLoc DL0(N0);
12038       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12039                         DAG.getConstant(SignMask, DL0, IntVT));
12040       AddToWorklist(Int.getNode());
12041       return DAG.getBitcast(VT, Int);
12042     }
12043   }
12044 
12045   // (fneg (fmul c, x)) -> (fmul -c, x)
12046   if (N0.getOpcode() == ISD::FMUL &&
12047       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12048     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12049     if (CFP1) {
12050       APFloat CVal = CFP1->getValueAPF();
12051       CVal.changeSign();
12052       if (Level >= AfterLegalizeDAG &&
12053           (TLI.isFPImmLegal(CVal, VT) ||
12054            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12055         return DAG.getNode(
12056             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12057             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12058             N0->getFlags());
12059     }
12060   }
12061 
12062   return SDValue();
12063 }
12064 
12065 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12066                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12067   SDValue N0 = N->getOperand(0);
12068   SDValue N1 = N->getOperand(1);
12069   EVT VT = N->getValueType(0);
12070   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12071   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12072 
12073   if (N0CFP && N1CFP) {
12074     const APFloat &C0 = N0CFP->getValueAPF();
12075     const APFloat &C1 = N1CFP->getValueAPF();
12076     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12077   }
12078 
12079   // Canonicalize to constant on RHS.
12080   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12081       !isConstantFPBuildVectorOrConstantFP(N1))
12082     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12083 
12084   return SDValue();
12085 }
12086 
12087 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12088   return visitFMinMax(DAG, N, minnum);
12089 }
12090 
12091 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12092   return visitFMinMax(DAG, N, maxnum);
12093 }
12094 
12095 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12096   return visitFMinMax(DAG, N, minimum);
12097 }
12098 
12099 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12100   return visitFMinMax(DAG, N, maximum);
12101 }
12102 
12103 SDValue DAGCombiner::visitFABS(SDNode *N) {
12104   SDValue N0 = N->getOperand(0);
12105   EVT VT = N->getValueType(0);
12106 
12107   // fold (fabs c1) -> fabs(c1)
12108   if (isConstantFPBuildVectorOrConstantFP(N0))
12109     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12110 
12111   // fold (fabs (fabs x)) -> (fabs x)
12112   if (N0.getOpcode() == ISD::FABS)
12113     return N->getOperand(0);
12114 
12115   // fold (fabs (fneg x)) -> (fabs x)
12116   // fold (fabs (fcopysign x, y)) -> (fabs x)
12117   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12118     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12119 
12120   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12121   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12122     SDValue Int = N0.getOperand(0);
12123     EVT IntVT = Int.getValueType();
12124     if (IntVT.isInteger() && !IntVT.isVector()) {
12125       APInt SignMask;
12126       if (N0.getValueType().isVector()) {
12127         // For a vector, get a mask such as 0x7f... per scalar element
12128         // and splat it.
12129         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12130         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12131       } else {
12132         // For a scalar, just generate 0x7f...
12133         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12134       }
12135       SDLoc DL(N0);
12136       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12137                         DAG.getConstant(SignMask, DL, IntVT));
12138       AddToWorklist(Int.getNode());
12139       return DAG.getBitcast(N->getValueType(0), Int);
12140     }
12141   }
12142 
12143   return SDValue();
12144 }
12145 
12146 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12147   SDValue Chain = N->getOperand(0);
12148   SDValue N1 = N->getOperand(1);
12149   SDValue N2 = N->getOperand(2);
12150 
12151   // If N is a constant we could fold this into a fallthrough or unconditional
12152   // branch. However that doesn't happen very often in normal code, because
12153   // Instcombine/SimplifyCFG should have handled the available opportunities.
12154   // If we did this folding here, it would be necessary to update the
12155   // MachineBasicBlock CFG, which is awkward.
12156 
12157   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12158   // on the target.
12159   if (N1.getOpcode() == ISD::SETCC &&
12160       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12161                                    N1.getOperand(0).getValueType())) {
12162     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12163                        Chain, N1.getOperand(2),
12164                        N1.getOperand(0), N1.getOperand(1), N2);
12165   }
12166 
12167   if (N1.hasOneUse()) {
12168     if (SDValue NewN1 = rebuildSetCC(N1))
12169       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12170   }
12171 
12172   return SDValue();
12173 }
12174 
12175 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12176   if (N.getOpcode() == ISD::SRL ||
12177       (N.getOpcode() == ISD::TRUNCATE &&
12178        (N.getOperand(0).hasOneUse() &&
12179         N.getOperand(0).getOpcode() == ISD::SRL))) {
12180     // Look pass the truncate.
12181     if (N.getOpcode() == ISD::TRUNCATE)
12182       N = N.getOperand(0);
12183 
12184     // Match this pattern so that we can generate simpler code:
12185     //
12186     //   %a = ...
12187     //   %b = and i32 %a, 2
12188     //   %c = srl i32 %b, 1
12189     //   brcond i32 %c ...
12190     //
12191     // into
12192     //
12193     //   %a = ...
12194     //   %b = and i32 %a, 2
12195     //   %c = setcc eq %b, 0
12196     //   brcond %c ...
12197     //
12198     // This applies only when the AND constant value has one bit set and the
12199     // SRL constant is equal to the log2 of the AND constant. The back-end is
12200     // smart enough to convert the result into a TEST/JMP sequence.
12201     SDValue Op0 = N.getOperand(0);
12202     SDValue Op1 = N.getOperand(1);
12203 
12204     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12205       SDValue AndOp1 = Op0.getOperand(1);
12206 
12207       if (AndOp1.getOpcode() == ISD::Constant) {
12208         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12209 
12210         if (AndConst.isPowerOf2() &&
12211             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12212           SDLoc DL(N);
12213           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12214                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12215                               ISD::SETNE);
12216         }
12217       }
12218     }
12219   }
12220 
12221   // Transform br(xor(x, y)) -> br(x != y)
12222   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12223   if (N.getOpcode() == ISD::XOR) {
12224     // Because we may call this on a speculatively constructed
12225     // SimplifiedSetCC Node, we need to simplify this node first.
12226     // Ideally this should be folded into SimplifySetCC and not
12227     // here. For now, grab a handle to N so we don't lose it from
12228     // replacements interal to the visit.
12229     HandleSDNode XORHandle(N);
12230     while (N.getOpcode() == ISD::XOR) {
12231       SDValue Tmp = visitXOR(N.getNode());
12232       // No simplification done.
12233       if (!Tmp.getNode())
12234         break;
12235       // Returning N is form in-visit replacement that may invalidated
12236       // N. Grab value from Handle.
12237       if (Tmp.getNode() == N.getNode())
12238         N = XORHandle.getValue();
12239       else // Node simplified. Try simplifying again.
12240         N = Tmp;
12241     }
12242 
12243     if (N.getOpcode() != ISD::XOR)
12244       return N;
12245 
12246     SDNode *TheXor = N.getNode();
12247 
12248     SDValue Op0 = TheXor->getOperand(0);
12249     SDValue Op1 = TheXor->getOperand(1);
12250 
12251     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12252       bool Equal = false;
12253       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12254           Op0.getOpcode() == ISD::XOR) {
12255         TheXor = Op0.getNode();
12256         Equal = true;
12257       }
12258 
12259       EVT SetCCVT = N.getValueType();
12260       if (LegalTypes)
12261         SetCCVT = getSetCCResultType(SetCCVT);
12262       // Replace the uses of XOR with SETCC
12263       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12264                           Equal ? ISD::SETEQ : ISD::SETNE);
12265     }
12266   }
12267 
12268   return SDValue();
12269 }
12270 
12271 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12272 //
12273 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12274   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12275   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12276 
12277   // If N is a constant we could fold this into a fallthrough or unconditional
12278   // branch. However that doesn't happen very often in normal code, because
12279   // Instcombine/SimplifyCFG should have handled the available opportunities.
12280   // If we did this folding here, it would be necessary to update the
12281   // MachineBasicBlock CFG, which is awkward.
12282 
12283   // Use SimplifySetCC to simplify SETCC's.
12284   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12285                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12286                                false);
12287   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12288 
12289   // fold to a simpler setcc
12290   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12291     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12292                        N->getOperand(0), Simp.getOperand(2),
12293                        Simp.getOperand(0), Simp.getOperand(1),
12294                        N->getOperand(4));
12295 
12296   return SDValue();
12297 }
12298 
12299 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12300 /// and that N may be folded in the load / store addressing mode.
12301 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12302                                     SelectionDAG &DAG,
12303                                     const TargetLowering &TLI) {
12304   EVT VT;
12305   unsigned AS;
12306 
12307   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12308     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12309       return false;
12310     VT = LD->getMemoryVT();
12311     AS = LD->getAddressSpace();
12312   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12313     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12314       return false;
12315     VT = ST->getMemoryVT();
12316     AS = ST->getAddressSpace();
12317   } else
12318     return false;
12319 
12320   TargetLowering::AddrMode AM;
12321   if (N->getOpcode() == ISD::ADD) {
12322     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12323     if (Offset)
12324       // [reg +/- imm]
12325       AM.BaseOffs = Offset->getSExtValue();
12326     else
12327       // [reg +/- reg]
12328       AM.Scale = 1;
12329   } else if (N->getOpcode() == ISD::SUB) {
12330     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12331     if (Offset)
12332       // [reg +/- imm]
12333       AM.BaseOffs = -Offset->getSExtValue();
12334     else
12335       // [reg +/- reg]
12336       AM.Scale = 1;
12337   } else
12338     return false;
12339 
12340   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12341                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12342 }
12343 
12344 /// Try turning a load/store into a pre-indexed load/store when the base
12345 /// pointer is an add or subtract and it has other uses besides the load/store.
12346 /// After the transformation, the new indexed load/store has effectively folded
12347 /// the add/subtract in and all of its other uses are redirected to the
12348 /// new load/store.
12349 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12350   if (Level < AfterLegalizeDAG)
12351     return false;
12352 
12353   bool isLoad = true;
12354   SDValue Ptr;
12355   EVT VT;
12356   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12357     if (LD->isIndexed())
12358       return false;
12359     VT = LD->getMemoryVT();
12360     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12361         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12362       return false;
12363     Ptr = LD->getBasePtr();
12364   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12365     if (ST->isIndexed())
12366       return false;
12367     VT = ST->getMemoryVT();
12368     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12369         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12370       return false;
12371     Ptr = ST->getBasePtr();
12372     isLoad = false;
12373   } else {
12374     return false;
12375   }
12376 
12377   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12378   // out.  There is no reason to make this a preinc/predec.
12379   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12380       Ptr.getNode()->hasOneUse())
12381     return false;
12382 
12383   // Ask the target to do addressing mode selection.
12384   SDValue BasePtr;
12385   SDValue Offset;
12386   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12387   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12388     return false;
12389 
12390   // Backends without true r+i pre-indexed forms may need to pass a
12391   // constant base with a variable offset so that constant coercion
12392   // will work with the patterns in canonical form.
12393   bool Swapped = false;
12394   if (isa<ConstantSDNode>(BasePtr)) {
12395     std::swap(BasePtr, Offset);
12396     Swapped = true;
12397   }
12398 
12399   // Don't create a indexed load / store with zero offset.
12400   if (isNullConstant(Offset))
12401     return false;
12402 
12403   // Try turning it into a pre-indexed load / store except when:
12404   // 1) The new base ptr is a frame index.
12405   // 2) If N is a store and the new base ptr is either the same as or is a
12406   //    predecessor of the value being stored.
12407   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12408   //    that would create a cycle.
12409   // 4) All uses are load / store ops that use it as old base ptr.
12410 
12411   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12412   // (plus the implicit offset) to a register to preinc anyway.
12413   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12414     return false;
12415 
12416   // Check #2.
12417   if (!isLoad) {
12418     SDValue Val = cast<StoreSDNode>(N)->getValue();
12419     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12420       return false;
12421   }
12422 
12423   // Caches for hasPredecessorHelper.
12424   SmallPtrSet<const SDNode *, 32> Visited;
12425   SmallVector<const SDNode *, 16> Worklist;
12426   Worklist.push_back(N);
12427 
12428   // If the offset is a constant, there may be other adds of constants that
12429   // can be folded with this one. We should do this to avoid having to keep
12430   // a copy of the original base pointer.
12431   SmallVector<SDNode *, 16> OtherUses;
12432   if (isa<ConstantSDNode>(Offset))
12433     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12434                               UE = BasePtr.getNode()->use_end();
12435          UI != UE; ++UI) {
12436       SDUse &Use = UI.getUse();
12437       // Skip the use that is Ptr and uses of other results from BasePtr's
12438       // node (important for nodes that return multiple results).
12439       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12440         continue;
12441 
12442       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12443         continue;
12444 
12445       if (Use.getUser()->getOpcode() != ISD::ADD &&
12446           Use.getUser()->getOpcode() != ISD::SUB) {
12447         OtherUses.clear();
12448         break;
12449       }
12450 
12451       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12452       if (!isa<ConstantSDNode>(Op1)) {
12453         OtherUses.clear();
12454         break;
12455       }
12456 
12457       // FIXME: In some cases, we can be smarter about this.
12458       if (Op1.getValueType() != Offset.getValueType()) {
12459         OtherUses.clear();
12460         break;
12461       }
12462 
12463       OtherUses.push_back(Use.getUser());
12464     }
12465 
12466   if (Swapped)
12467     std::swap(BasePtr, Offset);
12468 
12469   // Now check for #3 and #4.
12470   bool RealUse = false;
12471 
12472   for (SDNode *Use : Ptr.getNode()->uses()) {
12473     if (Use == N)
12474       continue;
12475     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12476       return false;
12477 
12478     // If Ptr may be folded in addressing mode of other use, then it's
12479     // not profitable to do this transformation.
12480     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12481       RealUse = true;
12482   }
12483 
12484   if (!RealUse)
12485     return false;
12486 
12487   SDValue Result;
12488   if (isLoad)
12489     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12490                                 BasePtr, Offset, AM);
12491   else
12492     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12493                                  BasePtr, Offset, AM);
12494   ++PreIndexedNodes;
12495   ++NodesCombined;
12496   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12497              Result.getNode()->dump(&DAG); dbgs() << '\n');
12498   WorklistRemover DeadNodes(*this);
12499   if (isLoad) {
12500     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12501     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12502   } else {
12503     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12504   }
12505 
12506   // Finally, since the node is now dead, remove it from the graph.
12507   deleteAndRecombine(N);
12508 
12509   if (Swapped)
12510     std::swap(BasePtr, Offset);
12511 
12512   // Replace other uses of BasePtr that can be updated to use Ptr
12513   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12514     unsigned OffsetIdx = 1;
12515     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12516       OffsetIdx = 0;
12517     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12518            BasePtr.getNode() && "Expected BasePtr operand");
12519 
12520     // We need to replace ptr0 in the following expression:
12521     //   x0 * offset0 + y0 * ptr0 = t0
12522     // knowing that
12523     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12524     //
12525     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12526     // indexed load/store and the expression that needs to be re-written.
12527     //
12528     // Therefore, we have:
12529     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12530 
12531     ConstantSDNode *CN =
12532       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12533     int X0, X1, Y0, Y1;
12534     const APInt &Offset0 = CN->getAPIntValue();
12535     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12536 
12537     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12538     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12539     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12540     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12541 
12542     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12543 
12544     APInt CNV = Offset0;
12545     if (X0 < 0) CNV = -CNV;
12546     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12547     else CNV = CNV - Offset1;
12548 
12549     SDLoc DL(OtherUses[i]);
12550 
12551     // We can now generate the new expression.
12552     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12553     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12554 
12555     SDValue NewUse = DAG.getNode(Opcode,
12556                                  DL,
12557                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12558     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12559     deleteAndRecombine(OtherUses[i]);
12560   }
12561 
12562   // Replace the uses of Ptr with uses of the updated base value.
12563   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12564   deleteAndRecombine(Ptr.getNode());
12565   AddToWorklist(Result.getNode());
12566 
12567   return true;
12568 }
12569 
12570 /// Try to combine a load/store with a add/sub of the base pointer node into a
12571 /// post-indexed load/store. The transformation folded the add/subtract into the
12572 /// new indexed load/store effectively and all of its uses are redirected to the
12573 /// new load/store.
12574 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12575   if (Level < AfterLegalizeDAG)
12576     return false;
12577 
12578   bool isLoad = true;
12579   SDValue Ptr;
12580   EVT VT;
12581   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12582     if (LD->isIndexed())
12583       return false;
12584     VT = LD->getMemoryVT();
12585     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12586         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12587       return false;
12588     Ptr = LD->getBasePtr();
12589   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12590     if (ST->isIndexed())
12591       return false;
12592     VT = ST->getMemoryVT();
12593     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12594         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12595       return false;
12596     Ptr = ST->getBasePtr();
12597     isLoad = false;
12598   } else {
12599     return false;
12600   }
12601 
12602   if (Ptr.getNode()->hasOneUse())
12603     return false;
12604 
12605   for (SDNode *Op : Ptr.getNode()->uses()) {
12606     if (Op == N ||
12607         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12608       continue;
12609 
12610     SDValue BasePtr;
12611     SDValue Offset;
12612     ISD::MemIndexedMode AM = ISD::UNINDEXED;
12613     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12614       // Don't create a indexed load / store with zero offset.
12615       if (isNullConstant(Offset))
12616         continue;
12617 
12618       // Try turning it into a post-indexed load / store except when
12619       // 1) All uses are load / store ops that use it as base ptr (and
12620       //    it may be folded as addressing mmode).
12621       // 2) Op must be independent of N, i.e. Op is neither a predecessor
12622       //    nor a successor of N. Otherwise, if Op is folded that would
12623       //    create a cycle.
12624 
12625       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12626         continue;
12627 
12628       // Check for #1.
12629       bool TryNext = false;
12630       for (SDNode *Use : BasePtr.getNode()->uses()) {
12631         if (Use == Ptr.getNode())
12632           continue;
12633 
12634         // If all the uses are load / store addresses, then don't do the
12635         // transformation.
12636         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12637           bool RealUse = false;
12638           for (SDNode *UseUse : Use->uses()) {
12639             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12640               RealUse = true;
12641           }
12642 
12643           if (!RealUse) {
12644             TryNext = true;
12645             break;
12646           }
12647         }
12648       }
12649 
12650       if (TryNext)
12651         continue;
12652 
12653       // Check for #2.
12654       SmallPtrSet<const SDNode *, 32> Visited;
12655       SmallVector<const SDNode *, 8> Worklist;
12656       // Ptr is predecessor to both N and Op.
12657       Visited.insert(Ptr.getNode());
12658       Worklist.push_back(N);
12659       Worklist.push_back(Op);
12660       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
12661           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
12662         SDValue Result = isLoad
12663           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12664                                BasePtr, Offset, AM)
12665           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12666                                 BasePtr, Offset, AM);
12667         ++PostIndexedNodes;
12668         ++NodesCombined;
12669         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12670                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12671                    dbgs() << '\n');
12672         WorklistRemover DeadNodes(*this);
12673         if (isLoad) {
12674           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12675           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12676         } else {
12677           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12678         }
12679 
12680         // Finally, since the node is now dead, remove it from the graph.
12681         deleteAndRecombine(N);
12682 
12683         // Replace the uses of Use with uses of the updated base value.
12684         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12685                                       Result.getValue(isLoad ? 1 : 0));
12686         deleteAndRecombine(Op);
12687         return true;
12688       }
12689     }
12690   }
12691 
12692   return false;
12693 }
12694 
12695 /// Return the base-pointer arithmetic from an indexed \p LD.
12696 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12697   ISD::MemIndexedMode AM = LD->getAddressingMode();
12698   assert(AM != ISD::UNINDEXED);
12699   SDValue BP = LD->getOperand(1);
12700   SDValue Inc = LD->getOperand(2);
12701 
12702   // Some backends use TargetConstants for load offsets, but don't expect
12703   // TargetConstants in general ADD nodes. We can convert these constants into
12704   // regular Constants (if the constant is not opaque).
12705   assert((Inc.getOpcode() != ISD::TargetConstant ||
12706           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12707          "Cannot split out indexing using opaque target constants");
12708   if (Inc.getOpcode() == ISD::TargetConstant) {
12709     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12710     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12711                           ConstInc->getValueType(0));
12712   }
12713 
12714   unsigned Opc =
12715       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12716   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12717 }
12718 
12719 static inline int numVectorEltsOrZero(EVT T) {
12720   return T.isVector() ? T.getVectorNumElements() : 0;
12721 }
12722 
12723 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
12724   Val = ST->getValue();
12725   EVT STType = Val.getValueType();
12726   EVT STMemType = ST->getMemoryVT();
12727   if (STType == STMemType)
12728     return true;
12729   if (isTypeLegal(STMemType))
12730     return false; // fail.
12731   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
12732       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
12733     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
12734     return true;
12735   }
12736   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
12737       STType.isInteger() && STMemType.isInteger()) {
12738     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
12739     return true;
12740   }
12741   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
12742     Val = DAG.getBitcast(STMemType, Val);
12743     return true;
12744   }
12745   return false; // fail.
12746 }
12747 
12748 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
12749   EVT LDMemType = LD->getMemoryVT();
12750   EVT LDType = LD->getValueType(0);
12751   assert(Val.getValueType() == LDMemType &&
12752          "Attempting to extend value of non-matching type");
12753   if (LDType == LDMemType)
12754     return true;
12755   if (LDMemType.isInteger() && LDType.isInteger()) {
12756     switch (LD->getExtensionType()) {
12757     case ISD::NON_EXTLOAD:
12758       Val = DAG.getBitcast(LDType, Val);
12759       return true;
12760     case ISD::EXTLOAD:
12761       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
12762       return true;
12763     case ISD::SEXTLOAD:
12764       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
12765       return true;
12766     case ISD::ZEXTLOAD:
12767       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
12768       return true;
12769     }
12770   }
12771   return false;
12772 }
12773 
12774 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
12775   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
12776     return SDValue();
12777   SDValue Chain = LD->getOperand(0);
12778   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
12779   if (!ST || ST->isVolatile())
12780     return SDValue();
12781 
12782   EVT LDType = LD->getValueType(0);
12783   EVT LDMemType = LD->getMemoryVT();
12784   EVT STMemType = ST->getMemoryVT();
12785   EVT STType = ST->getValue().getValueType();
12786 
12787   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
12788   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
12789   int64_t Offset;
12790   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
12791     return SDValue();
12792 
12793   // Normalize for Endianness. After this Offset=0 will denote that the least
12794   // significant bit in the loaded value maps to the least significant bit in
12795   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
12796   // n:th least significant byte of the stored value.
12797   if (DAG.getDataLayout().isBigEndian())
12798     Offset = (STMemType.getStoreSizeInBits() -
12799               LDMemType.getStoreSizeInBits()) / 8 - Offset;
12800 
12801   // Check that the stored value cover all bits that are loaded.
12802   bool STCoversLD =
12803       (Offset >= 0) &&
12804       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
12805 
12806   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
12807     if (LD->isIndexed()) {
12808       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
12809                     LD->getAddressingMode() == ISD::POST_DEC);
12810       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
12811       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
12812                              LD->getOperand(1), LD->getOperand(2));
12813       SDValue Ops[] = {Val, Idx, Chain};
12814       return CombineTo(LD, Ops, 3);
12815     }
12816     return CombineTo(LD, Val, Chain);
12817   };
12818 
12819   if (!STCoversLD)
12820     return SDValue();
12821 
12822   // Memory as copy space (potentially masked).
12823   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
12824     // Simple case: Direct non-truncating forwarding
12825     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
12826       return ReplaceLd(LD, ST->getValue(), Chain);
12827     // Can we model the truncate and extension with an and mask?
12828     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
12829         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
12830       // Mask to size of LDMemType
12831       auto Mask =
12832           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
12833                                                STMemType.getSizeInBits()),
12834                           SDLoc(ST), STType);
12835       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
12836       return ReplaceLd(LD, Val, Chain);
12837     }
12838   }
12839 
12840   // TODO: Deal with nonzero offset.
12841   if (LD->getBasePtr().isUndef() || Offset != 0)
12842     return SDValue();
12843   // Model necessary truncations / extenstions.
12844   SDValue Val;
12845   // Truncate Value To Stored Memory Size.
12846   do {
12847     if (!getTruncatedStoreValue(ST, Val))
12848       continue;
12849     if (!isTypeLegal(LDMemType))
12850       continue;
12851     if (STMemType != LDMemType) {
12852       // TODO: Support vectors? This requires extract_subvector/bitcast.
12853       if (!STMemType.isVector() && !LDMemType.isVector() &&
12854           STMemType.isInteger() && LDMemType.isInteger())
12855         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
12856       else
12857         continue;
12858     }
12859     if (!extendLoadedValueToExtension(LD, Val))
12860       continue;
12861     return ReplaceLd(LD, Val, Chain);
12862   } while (false);
12863 
12864   // On failure, cleanup dead nodes we may have created.
12865   if (Val->use_empty())
12866     deleteAndRecombine(Val.getNode());
12867   return SDValue();
12868 }
12869 
12870 SDValue DAGCombiner::visitLOAD(SDNode *N) {
12871   LoadSDNode *LD  = cast<LoadSDNode>(N);
12872   SDValue Chain = LD->getChain();
12873   SDValue Ptr   = LD->getBasePtr();
12874 
12875   // If load is not volatile and there are no uses of the loaded value (and
12876   // the updated indexed value in case of indexed loads), change uses of the
12877   // chain value into uses of the chain input (i.e. delete the dead load).
12878   if (!LD->isVolatile()) {
12879     if (N->getValueType(1) == MVT::Other) {
12880       // Unindexed loads.
12881       if (!N->hasAnyUseOfValue(0)) {
12882         // It's not safe to use the two value CombineTo variant here. e.g.
12883         // v1, chain2 = load chain1, loc
12884         // v2, chain3 = load chain2, loc
12885         // v3         = add v2, c
12886         // Now we replace use of chain2 with chain1.  This makes the second load
12887         // isomorphic to the one we are deleting, and thus makes this load live.
12888         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12889                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12890                    dbgs() << "\n");
12891         WorklistRemover DeadNodes(*this);
12892         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12893         AddUsersToWorklist(Chain.getNode());
12894         if (N->use_empty())
12895           deleteAndRecombine(N);
12896 
12897         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12898       }
12899     } else {
12900       // Indexed loads.
12901       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12902 
12903       // If this load has an opaque TargetConstant offset, then we cannot split
12904       // the indexing into an add/sub directly (that TargetConstant may not be
12905       // valid for a different type of node, and we cannot convert an opaque
12906       // target constant into a regular constant).
12907       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12908                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12909 
12910       if (!N->hasAnyUseOfValue(0) &&
12911           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12912         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12913         SDValue Index;
12914         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12915           Index = SplitIndexingFromLoad(LD);
12916           // Try to fold the base pointer arithmetic into subsequent loads and
12917           // stores.
12918           AddUsersToWorklist(N);
12919         } else
12920           Index = DAG.getUNDEF(N->getValueType(1));
12921         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12922                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12923                    dbgs() << " and 2 other values\n");
12924         WorklistRemover DeadNodes(*this);
12925         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12926         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12927         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12928         deleteAndRecombine(N);
12929         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12930       }
12931     }
12932   }
12933 
12934   // If this load is directly stored, replace the load value with the stored
12935   // value.
12936   if (auto V = ForwardStoreValueToDirectLoad(LD))
12937     return V;
12938 
12939   // Try to infer better alignment information than the load already has.
12940   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12941     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12942       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12943         SDValue NewLoad = DAG.getExtLoad(
12944             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12945             LD->getPointerInfo(), LD->getMemoryVT(), Align,
12946             LD->getMemOperand()->getFlags(), LD->getAAInfo());
12947         // NewLoad will always be N as we are only refining the alignment
12948         assert(NewLoad.getNode() == N);
12949         (void)NewLoad;
12950       }
12951     }
12952   }
12953 
12954   if (LD->isUnindexed()) {
12955     // Walk up chain skipping non-aliasing memory nodes.
12956     SDValue BetterChain = FindBetterChain(N, Chain);
12957 
12958     // If there is a better chain.
12959     if (Chain != BetterChain) {
12960       SDValue ReplLoad;
12961 
12962       // Replace the chain to void dependency.
12963       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
12964         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
12965                                BetterChain, Ptr, LD->getMemOperand());
12966       } else {
12967         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
12968                                   LD->getValueType(0),
12969                                   BetterChain, Ptr, LD->getMemoryVT(),
12970                                   LD->getMemOperand());
12971       }
12972 
12973       // Create token factor to keep old chain connected.
12974       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
12975                                   MVT::Other, Chain, ReplLoad.getValue(1));
12976 
12977       // Replace uses with load result and token factor
12978       return CombineTo(N, ReplLoad.getValue(0), Token);
12979     }
12980   }
12981 
12982   // Try transforming N to an indexed load.
12983   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12984     return SDValue(N, 0);
12985 
12986   // Try to slice up N to more direct loads if the slices are mapped to
12987   // different register banks or pairing can take place.
12988   if (SliceUpLoad(N))
12989     return SDValue(N, 0);
12990 
12991   return SDValue();
12992 }
12993 
12994 namespace {
12995 
12996 /// Helper structure used to slice a load in smaller loads.
12997 /// Basically a slice is obtained from the following sequence:
12998 /// Origin = load Ty1, Base
12999 /// Shift = srl Ty1 Origin, CstTy Amount
13000 /// Inst = trunc Shift to Ty2
13001 ///
13002 /// Then, it will be rewritten into:
13003 /// Slice = load SliceTy, Base + SliceOffset
13004 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13005 ///
13006 /// SliceTy is deduced from the number of bits that are actually used to
13007 /// build Inst.
13008 struct LoadedSlice {
13009   /// Helper structure used to compute the cost of a slice.
13010   struct Cost {
13011     /// Are we optimizing for code size.
13012     bool ForCodeSize;
13013 
13014     /// Various cost.
13015     unsigned Loads = 0;
13016     unsigned Truncates = 0;
13017     unsigned CrossRegisterBanksCopies = 0;
13018     unsigned ZExts = 0;
13019     unsigned Shift = 0;
13020 
13021     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13022 
13023     /// Get the cost of one isolated slice.
13024     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13025         : ForCodeSize(ForCodeSize), Loads(1) {
13026       EVT TruncType = LS.Inst->getValueType(0);
13027       EVT LoadedType = LS.getLoadedType();
13028       if (TruncType != LoadedType &&
13029           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13030         ZExts = 1;
13031     }
13032 
13033     /// Account for slicing gain in the current cost.
13034     /// Slicing provide a few gains like removing a shift or a
13035     /// truncate. This method allows to grow the cost of the original
13036     /// load with the gain from this slice.
13037     void addSliceGain(const LoadedSlice &LS) {
13038       // Each slice saves a truncate.
13039       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13040       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13041                               LS.Inst->getValueType(0)))
13042         ++Truncates;
13043       // If there is a shift amount, this slice gets rid of it.
13044       if (LS.Shift)
13045         ++Shift;
13046       // If this slice can merge a cross register bank copy, account for it.
13047       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13048         ++CrossRegisterBanksCopies;
13049     }
13050 
13051     Cost &operator+=(const Cost &RHS) {
13052       Loads += RHS.Loads;
13053       Truncates += RHS.Truncates;
13054       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13055       ZExts += RHS.ZExts;
13056       Shift += RHS.Shift;
13057       return *this;
13058     }
13059 
13060     bool operator==(const Cost &RHS) const {
13061       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13062              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13063              ZExts == RHS.ZExts && Shift == RHS.Shift;
13064     }
13065 
13066     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13067 
13068     bool operator<(const Cost &RHS) const {
13069       // Assume cross register banks copies are as expensive as loads.
13070       // FIXME: Do we want some more target hooks?
13071       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13072       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13073       // Unless we are optimizing for code size, consider the
13074       // expensive operation first.
13075       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13076         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13077       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13078              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13079     }
13080 
13081     bool operator>(const Cost &RHS) const { return RHS < *this; }
13082 
13083     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13084 
13085     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13086   };
13087 
13088   // The last instruction that represent the slice. This should be a
13089   // truncate instruction.
13090   SDNode *Inst;
13091 
13092   // The original load instruction.
13093   LoadSDNode *Origin;
13094 
13095   // The right shift amount in bits from the original load.
13096   unsigned Shift;
13097 
13098   // The DAG from which Origin came from.
13099   // This is used to get some contextual information about legal types, etc.
13100   SelectionDAG *DAG;
13101 
13102   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13103               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13104       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13105 
13106   /// Get the bits used in a chunk of bits \p BitWidth large.
13107   /// \return Result is \p BitWidth and has used bits set to 1 and
13108   ///         not used bits set to 0.
13109   APInt getUsedBits() const {
13110     // Reproduce the trunc(lshr) sequence:
13111     // - Start from the truncated value.
13112     // - Zero extend to the desired bit width.
13113     // - Shift left.
13114     assert(Origin && "No original load to compare against.");
13115     unsigned BitWidth = Origin->getValueSizeInBits(0);
13116     assert(Inst && "This slice is not bound to an instruction");
13117     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13118            "Extracted slice is bigger than the whole type!");
13119     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13120     UsedBits.setAllBits();
13121     UsedBits = UsedBits.zext(BitWidth);
13122     UsedBits <<= Shift;
13123     return UsedBits;
13124   }
13125 
13126   /// Get the size of the slice to be loaded in bytes.
13127   unsigned getLoadedSize() const {
13128     unsigned SliceSize = getUsedBits().countPopulation();
13129     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13130     return SliceSize / 8;
13131   }
13132 
13133   /// Get the type that will be loaded for this slice.
13134   /// Note: This may not be the final type for the slice.
13135   EVT getLoadedType() const {
13136     assert(DAG && "Missing context");
13137     LLVMContext &Ctxt = *DAG->getContext();
13138     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13139   }
13140 
13141   /// Get the alignment of the load used for this slice.
13142   unsigned getAlignment() const {
13143     unsigned Alignment = Origin->getAlignment();
13144     unsigned Offset = getOffsetFromBase();
13145     if (Offset != 0)
13146       Alignment = MinAlign(Alignment, Alignment + Offset);
13147     return Alignment;
13148   }
13149 
13150   /// Check if this slice can be rewritten with legal operations.
13151   bool isLegal() const {
13152     // An invalid slice is not legal.
13153     if (!Origin || !Inst || !DAG)
13154       return false;
13155 
13156     // Offsets are for indexed load only, we do not handle that.
13157     if (!Origin->getOffset().isUndef())
13158       return false;
13159 
13160     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13161 
13162     // Check that the type is legal.
13163     EVT SliceType = getLoadedType();
13164     if (!TLI.isTypeLegal(SliceType))
13165       return false;
13166 
13167     // Check that the load is legal for this type.
13168     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13169       return false;
13170 
13171     // Check that the offset can be computed.
13172     // 1. Check its type.
13173     EVT PtrType = Origin->getBasePtr().getValueType();
13174     if (PtrType == MVT::Untyped || PtrType.isExtended())
13175       return false;
13176 
13177     // 2. Check that it fits in the immediate.
13178     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13179       return false;
13180 
13181     // 3. Check that the computation is legal.
13182     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13183       return false;
13184 
13185     // Check that the zext is legal if it needs one.
13186     EVT TruncateType = Inst->getValueType(0);
13187     if (TruncateType != SliceType &&
13188         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13189       return false;
13190 
13191     return true;
13192   }
13193 
13194   /// Get the offset in bytes of this slice in the original chunk of
13195   /// bits.
13196   /// \pre DAG != nullptr.
13197   uint64_t getOffsetFromBase() const {
13198     assert(DAG && "Missing context.");
13199     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13200     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13201     uint64_t Offset = Shift / 8;
13202     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13203     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13204            "The size of the original loaded type is not a multiple of a"
13205            " byte.");
13206     // If Offset is bigger than TySizeInBytes, it means we are loading all
13207     // zeros. This should have been optimized before in the process.
13208     assert(TySizeInBytes > Offset &&
13209            "Invalid shift amount for given loaded size");
13210     if (IsBigEndian)
13211       Offset = TySizeInBytes - Offset - getLoadedSize();
13212     return Offset;
13213   }
13214 
13215   /// Generate the sequence of instructions to load the slice
13216   /// represented by this object and redirect the uses of this slice to
13217   /// this new sequence of instructions.
13218   /// \pre this->Inst && this->Origin are valid Instructions and this
13219   /// object passed the legal check: LoadedSlice::isLegal returned true.
13220   /// \return The last instruction of the sequence used to load the slice.
13221   SDValue loadSlice() const {
13222     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13223     const SDValue &OldBaseAddr = Origin->getBasePtr();
13224     SDValue BaseAddr = OldBaseAddr;
13225     // Get the offset in that chunk of bytes w.r.t. the endianness.
13226     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13227     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13228     if (Offset) {
13229       // BaseAddr = BaseAddr + Offset.
13230       EVT ArithType = BaseAddr.getValueType();
13231       SDLoc DL(Origin);
13232       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13233                               DAG->getConstant(Offset, DL, ArithType));
13234     }
13235 
13236     // Create the type of the loaded slice according to its size.
13237     EVT SliceType = getLoadedType();
13238 
13239     // Create the load for the slice.
13240     SDValue LastInst =
13241         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13242                      Origin->getPointerInfo().getWithOffset(Offset),
13243                      getAlignment(), Origin->getMemOperand()->getFlags());
13244     // If the final type is not the same as the loaded type, this means that
13245     // we have to pad with zero. Create a zero extend for that.
13246     EVT FinalType = Inst->getValueType(0);
13247     if (SliceType != FinalType)
13248       LastInst =
13249           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13250     return LastInst;
13251   }
13252 
13253   /// Check if this slice can be merged with an expensive cross register
13254   /// bank copy. E.g.,
13255   /// i = load i32
13256   /// f = bitcast i32 i to float
13257   bool canMergeExpensiveCrossRegisterBankCopy() const {
13258     if (!Inst || !Inst->hasOneUse())
13259       return false;
13260     SDNode *Use = *Inst->use_begin();
13261     if (Use->getOpcode() != ISD::BITCAST)
13262       return false;
13263     assert(DAG && "Missing context");
13264     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13265     EVT ResVT = Use->getValueType(0);
13266     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13267     const TargetRegisterClass *ArgRC =
13268         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13269     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13270       return false;
13271 
13272     // At this point, we know that we perform a cross-register-bank copy.
13273     // Check if it is expensive.
13274     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13275     // Assume bitcasts are cheap, unless both register classes do not
13276     // explicitly share a common sub class.
13277     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13278       return false;
13279 
13280     // Check if it will be merged with the load.
13281     // 1. Check the alignment constraint.
13282     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13283         ResVT.getTypeForEVT(*DAG->getContext()));
13284 
13285     if (RequiredAlignment > getAlignment())
13286       return false;
13287 
13288     // 2. Check that the load is a legal operation for that type.
13289     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13290       return false;
13291 
13292     // 3. Check that we do not have a zext in the way.
13293     if (Inst->getValueType(0) != getLoadedType())
13294       return false;
13295 
13296     return true;
13297   }
13298 };
13299 
13300 } // end anonymous namespace
13301 
13302 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13303 /// \p UsedBits looks like 0..0 1..1 0..0.
13304 static bool areUsedBitsDense(const APInt &UsedBits) {
13305   // If all the bits are one, this is dense!
13306   if (UsedBits.isAllOnesValue())
13307     return true;
13308 
13309   // Get rid of the unused bits on the right.
13310   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13311   // Get rid of the unused bits on the left.
13312   if (NarrowedUsedBits.countLeadingZeros())
13313     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13314   // Check that the chunk of bits is completely used.
13315   return NarrowedUsedBits.isAllOnesValue();
13316 }
13317 
13318 /// Check whether or not \p First and \p Second are next to each other
13319 /// in memory. This means that there is no hole between the bits loaded
13320 /// by \p First and the bits loaded by \p Second.
13321 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13322                                      const LoadedSlice &Second) {
13323   assert(First.Origin == Second.Origin && First.Origin &&
13324          "Unable to match different memory origins.");
13325   APInt UsedBits = First.getUsedBits();
13326   assert((UsedBits & Second.getUsedBits()) == 0 &&
13327          "Slices are not supposed to overlap.");
13328   UsedBits |= Second.getUsedBits();
13329   return areUsedBitsDense(UsedBits);
13330 }
13331 
13332 /// Adjust the \p GlobalLSCost according to the target
13333 /// paring capabilities and the layout of the slices.
13334 /// \pre \p GlobalLSCost should account for at least as many loads as
13335 /// there is in the slices in \p LoadedSlices.
13336 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13337                                  LoadedSlice::Cost &GlobalLSCost) {
13338   unsigned NumberOfSlices = LoadedSlices.size();
13339   // If there is less than 2 elements, no pairing is possible.
13340   if (NumberOfSlices < 2)
13341     return;
13342 
13343   // Sort the slices so that elements that are likely to be next to each
13344   // other in memory are next to each other in the list.
13345   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13346     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13347     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13348   });
13349   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13350   // First (resp. Second) is the first (resp. Second) potentially candidate
13351   // to be placed in a paired load.
13352   const LoadedSlice *First = nullptr;
13353   const LoadedSlice *Second = nullptr;
13354   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13355                 // Set the beginning of the pair.
13356                                                            First = Second) {
13357     Second = &LoadedSlices[CurrSlice];
13358 
13359     // If First is NULL, it means we start a new pair.
13360     // Get to the next slice.
13361     if (!First)
13362       continue;
13363 
13364     EVT LoadedType = First->getLoadedType();
13365 
13366     // If the types of the slices are different, we cannot pair them.
13367     if (LoadedType != Second->getLoadedType())
13368       continue;
13369 
13370     // Check if the target supplies paired loads for this type.
13371     unsigned RequiredAlignment = 0;
13372     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13373       // move to the next pair, this type is hopeless.
13374       Second = nullptr;
13375       continue;
13376     }
13377     // Check if we meet the alignment requirement.
13378     if (RequiredAlignment > First->getAlignment())
13379       continue;
13380 
13381     // Check that both loads are next to each other in memory.
13382     if (!areSlicesNextToEachOther(*First, *Second))
13383       continue;
13384 
13385     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13386     --GlobalLSCost.Loads;
13387     // Move to the next pair.
13388     Second = nullptr;
13389   }
13390 }
13391 
13392 /// Check the profitability of all involved LoadedSlice.
13393 /// Currently, it is considered profitable if there is exactly two
13394 /// involved slices (1) which are (2) next to each other in memory, and
13395 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13396 ///
13397 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13398 /// the elements themselves.
13399 ///
13400 /// FIXME: When the cost model will be mature enough, we can relax
13401 /// constraints (1) and (2).
13402 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13403                                 const APInt &UsedBits, bool ForCodeSize) {
13404   unsigned NumberOfSlices = LoadedSlices.size();
13405   if (StressLoadSlicing)
13406     return NumberOfSlices > 1;
13407 
13408   // Check (1).
13409   if (NumberOfSlices != 2)
13410     return false;
13411 
13412   // Check (2).
13413   if (!areUsedBitsDense(UsedBits))
13414     return false;
13415 
13416   // Check (3).
13417   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13418   // The original code has one big load.
13419   OrigCost.Loads = 1;
13420   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13421     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13422     // Accumulate the cost of all the slices.
13423     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13424     GlobalSlicingCost += SliceCost;
13425 
13426     // Account as cost in the original configuration the gain obtained
13427     // with the current slices.
13428     OrigCost.addSliceGain(LS);
13429   }
13430 
13431   // If the target supports paired load, adjust the cost accordingly.
13432   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13433   return OrigCost > GlobalSlicingCost;
13434 }
13435 
13436 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13437 /// operations, split it in the various pieces being extracted.
13438 ///
13439 /// This sort of thing is introduced by SROA.
13440 /// This slicing takes care not to insert overlapping loads.
13441 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13442 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13443   if (Level < AfterLegalizeDAG)
13444     return false;
13445 
13446   LoadSDNode *LD = cast<LoadSDNode>(N);
13447   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13448       !LD->getValueType(0).isInteger())
13449     return false;
13450 
13451   // Keep track of already used bits to detect overlapping values.
13452   // In that case, we will just abort the transformation.
13453   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13454 
13455   SmallVector<LoadedSlice, 4> LoadedSlices;
13456 
13457   // Check if this load is used as several smaller chunks of bits.
13458   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13459   // of computation for each trunc.
13460   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13461        UI != UIEnd; ++UI) {
13462     // Skip the uses of the chain.
13463     if (UI.getUse().getResNo() != 0)
13464       continue;
13465 
13466     SDNode *User = *UI;
13467     unsigned Shift = 0;
13468 
13469     // Check if this is a trunc(lshr).
13470     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13471         isa<ConstantSDNode>(User->getOperand(1))) {
13472       Shift = User->getConstantOperandVal(1);
13473       User = *User->use_begin();
13474     }
13475 
13476     // At this point, User is a Truncate, iff we encountered, trunc or
13477     // trunc(lshr).
13478     if (User->getOpcode() != ISD::TRUNCATE)
13479       return false;
13480 
13481     // The width of the type must be a power of 2 and greater than 8-bits.
13482     // Otherwise the load cannot be represented in LLVM IR.
13483     // Moreover, if we shifted with a non-8-bits multiple, the slice
13484     // will be across several bytes. We do not support that.
13485     unsigned Width = User->getValueSizeInBits(0);
13486     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13487       return false;
13488 
13489     // Build the slice for this chain of computations.
13490     LoadedSlice LS(User, LD, Shift, &DAG);
13491     APInt CurrentUsedBits = LS.getUsedBits();
13492 
13493     // Check if this slice overlaps with another.
13494     if ((CurrentUsedBits & UsedBits) != 0)
13495       return false;
13496     // Update the bits used globally.
13497     UsedBits |= CurrentUsedBits;
13498 
13499     // Check if the new slice would be legal.
13500     if (!LS.isLegal())
13501       return false;
13502 
13503     // Record the slice.
13504     LoadedSlices.push_back(LS);
13505   }
13506 
13507   // Abort slicing if it does not seem to be profitable.
13508   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13509     return false;
13510 
13511   ++SlicedLoads;
13512 
13513   // Rewrite each chain to use an independent load.
13514   // By construction, each chain can be represented by a unique load.
13515 
13516   // Prepare the argument for the new token factor for all the slices.
13517   SmallVector<SDValue, 8> ArgChains;
13518   for (SmallVectorImpl<LoadedSlice>::const_iterator
13519            LSIt = LoadedSlices.begin(),
13520            LSItEnd = LoadedSlices.end();
13521        LSIt != LSItEnd; ++LSIt) {
13522     SDValue SliceInst = LSIt->loadSlice();
13523     CombineTo(LSIt->Inst, SliceInst, true);
13524     if (SliceInst.getOpcode() != ISD::LOAD)
13525       SliceInst = SliceInst.getOperand(0);
13526     assert(SliceInst->getOpcode() == ISD::LOAD &&
13527            "It takes more than a zext to get to the loaded slice!!");
13528     ArgChains.push_back(SliceInst.getValue(1));
13529   }
13530 
13531   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13532                               ArgChains);
13533   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13534   AddToWorklist(Chain.getNode());
13535   return true;
13536 }
13537 
13538 /// Check to see if V is (and load (ptr), imm), where the load is having
13539 /// specific bytes cleared out.  If so, return the byte size being masked out
13540 /// and the shift amount.
13541 static std::pair<unsigned, unsigned>
13542 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13543   std::pair<unsigned, unsigned> Result(0, 0);
13544 
13545   // Check for the structure we're looking for.
13546   if (V->getOpcode() != ISD::AND ||
13547       !isa<ConstantSDNode>(V->getOperand(1)) ||
13548       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13549     return Result;
13550 
13551   // Check the chain and pointer.
13552   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13553   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13554 
13555   // This only handles simple types.
13556   if (V.getValueType() != MVT::i16 &&
13557       V.getValueType() != MVT::i32 &&
13558       V.getValueType() != MVT::i64)
13559     return Result;
13560 
13561   // Check the constant mask.  Invert it so that the bits being masked out are
13562   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13563   // follow the sign bit for uniformity.
13564   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13565   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13566   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13567   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13568   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13569   if (NotMaskLZ == 64) return Result;  // All zero mask.
13570 
13571   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13572   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13573     return Result;
13574 
13575   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13576   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13577     NotMaskLZ -= 64-V.getValueSizeInBits();
13578 
13579   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13580   switch (MaskedBytes) {
13581   case 1:
13582   case 2:
13583   case 4: break;
13584   default: return Result; // All one mask, or 5-byte mask.
13585   }
13586 
13587   // Verify that the first bit starts at a multiple of mask so that the access
13588   // is aligned the same as the access width.
13589   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13590 
13591   // For narrowing to be valid, it must be the case that the load the
13592   // immediately preceeding memory operation before the store.
13593   if (LD == Chain.getNode())
13594     ; // ok.
13595   else if (Chain->getOpcode() == ISD::TokenFactor &&
13596            SDValue(LD, 1).hasOneUse()) {
13597     // LD has only 1 chain use so they are no indirect dependencies.
13598     bool isOk = false;
13599     for (const SDValue &ChainOp : Chain->op_values())
13600       if (ChainOp.getNode() == LD) {
13601         isOk = true;
13602         break;
13603       }
13604     if (!isOk)
13605       return Result;
13606   } else
13607     return Result; // Fail.
13608 
13609   Result.first = MaskedBytes;
13610   Result.second = NotMaskTZ/8;
13611   return Result;
13612 }
13613 
13614 /// Check to see if IVal is something that provides a value as specified by
13615 /// MaskInfo. If so, replace the specified store with a narrower store of
13616 /// truncated IVal.
13617 static SDNode *
13618 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13619                                 SDValue IVal, StoreSDNode *St,
13620                                 DAGCombiner *DC) {
13621   unsigned NumBytes = MaskInfo.first;
13622   unsigned ByteShift = MaskInfo.second;
13623   SelectionDAG &DAG = DC->getDAG();
13624 
13625   // Check to see if IVal is all zeros in the part being masked in by the 'or'
13626   // that uses this.  If not, this is not a replacement.
13627   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13628                                   ByteShift*8, (ByteShift+NumBytes)*8);
13629   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13630 
13631   // Check that it is legal on the target to do this.  It is legal if the new
13632   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13633   // legalization.
13634   MVT VT = MVT::getIntegerVT(NumBytes*8);
13635   if (!DC->isTypeLegal(VT))
13636     return nullptr;
13637 
13638   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
13639   // shifted by ByteShift and truncated down to NumBytes.
13640   if (ByteShift) {
13641     SDLoc DL(IVal);
13642     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13643                        DAG.getConstant(ByteShift*8, DL,
13644                                     DC->getShiftAmountTy(IVal.getValueType())));
13645   }
13646 
13647   // Figure out the offset for the store and the alignment of the access.
13648   unsigned StOffset;
13649   unsigned NewAlign = St->getAlignment();
13650 
13651   if (DAG.getDataLayout().isLittleEndian())
13652     StOffset = ByteShift;
13653   else
13654     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13655 
13656   SDValue Ptr = St->getBasePtr();
13657   if (StOffset) {
13658     SDLoc DL(IVal);
13659     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13660                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13661     NewAlign = MinAlign(NewAlign, StOffset);
13662   }
13663 
13664   // Truncate down to the new size.
13665   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13666 
13667   ++OpsNarrowed;
13668   return DAG
13669       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13670                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13671       .getNode();
13672 }
13673 
13674 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13675 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13676 /// narrowing the load and store if it would end up being a win for performance
13677 /// or code size.
13678 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13679   StoreSDNode *ST  = cast<StoreSDNode>(N);
13680   if (ST->isVolatile())
13681     return SDValue();
13682 
13683   SDValue Chain = ST->getChain();
13684   SDValue Value = ST->getValue();
13685   SDValue Ptr   = ST->getBasePtr();
13686   EVT VT = Value.getValueType();
13687 
13688   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13689     return SDValue();
13690 
13691   unsigned Opc = Value.getOpcode();
13692 
13693   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13694   // is a byte mask indicating a consecutive number of bytes, check to see if
13695   // Y is known to provide just those bytes.  If so, we try to replace the
13696   // load + replace + store sequence with a single (narrower) store, which makes
13697   // the load dead.
13698   if (Opc == ISD::OR) {
13699     std::pair<unsigned, unsigned> MaskedLoad;
13700     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13701     if (MaskedLoad.first)
13702       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13703                                                   Value.getOperand(1), ST,this))
13704         return SDValue(NewST, 0);
13705 
13706     // Or is commutative, so try swapping X and Y.
13707     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13708     if (MaskedLoad.first)
13709       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13710                                                   Value.getOperand(0), ST,this))
13711         return SDValue(NewST, 0);
13712   }
13713 
13714   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13715       Value.getOperand(1).getOpcode() != ISD::Constant)
13716     return SDValue();
13717 
13718   SDValue N0 = Value.getOperand(0);
13719   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13720       Chain == SDValue(N0.getNode(), 1)) {
13721     LoadSDNode *LD = cast<LoadSDNode>(N0);
13722     if (LD->getBasePtr() != Ptr ||
13723         LD->getPointerInfo().getAddrSpace() !=
13724         ST->getPointerInfo().getAddrSpace())
13725       return SDValue();
13726 
13727     // Find the type to narrow it the load / op / store to.
13728     SDValue N1 = Value.getOperand(1);
13729     unsigned BitWidth = N1.getValueSizeInBits();
13730     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13731     if (Opc == ISD::AND)
13732       Imm ^= APInt::getAllOnesValue(BitWidth);
13733     if (Imm == 0 || Imm.isAllOnesValue())
13734       return SDValue();
13735     unsigned ShAmt = Imm.countTrailingZeros();
13736     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13737     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13738     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13739     // The narrowing should be profitable, the load/store operation should be
13740     // legal (or custom) and the store size should be equal to the NewVT width.
13741     while (NewBW < BitWidth &&
13742            (NewVT.getStoreSizeInBits() != NewBW ||
13743             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13744             !TLI.isNarrowingProfitable(VT, NewVT))) {
13745       NewBW = NextPowerOf2(NewBW);
13746       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13747     }
13748     if (NewBW >= BitWidth)
13749       return SDValue();
13750 
13751     // If the lsb changed does not start at the type bitwidth boundary,
13752     // start at the previous one.
13753     if (ShAmt % NewBW)
13754       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13755     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13756                                    std::min(BitWidth, ShAmt + NewBW));
13757     if ((Imm & Mask) == Imm) {
13758       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13759       if (Opc == ISD::AND)
13760         NewImm ^= APInt::getAllOnesValue(NewBW);
13761       uint64_t PtrOff = ShAmt / 8;
13762       // For big endian targets, we need to adjust the offset to the pointer to
13763       // load the correct bytes.
13764       if (DAG.getDataLayout().isBigEndian())
13765         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13766 
13767       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13768       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13769       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13770         return SDValue();
13771 
13772       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13773                                    Ptr.getValueType(), Ptr,
13774                                    DAG.getConstant(PtrOff, SDLoc(LD),
13775                                                    Ptr.getValueType()));
13776       SDValue NewLD =
13777           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13778                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13779                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
13780       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13781                                    DAG.getConstant(NewImm, SDLoc(Value),
13782                                                    NewVT));
13783       SDValue NewST =
13784           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13785                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13786 
13787       AddToWorklist(NewPtr.getNode());
13788       AddToWorklist(NewLD.getNode());
13789       AddToWorklist(NewVal.getNode());
13790       WorklistRemover DeadNodes(*this);
13791       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13792       ++OpsNarrowed;
13793       return NewST;
13794     }
13795   }
13796 
13797   return SDValue();
13798 }
13799 
13800 /// For a given floating point load / store pair, if the load value isn't used
13801 /// by any other operations, then consider transforming the pair to integer
13802 /// load / store operations if the target deems the transformation profitable.
13803 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13804   StoreSDNode *ST  = cast<StoreSDNode>(N);
13805   SDValue Chain = ST->getChain();
13806   SDValue Value = ST->getValue();
13807   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13808       Value.hasOneUse() &&
13809       Chain == SDValue(Value.getNode(), 1)) {
13810     LoadSDNode *LD = cast<LoadSDNode>(Value);
13811     EVT VT = LD->getMemoryVT();
13812     if (!VT.isFloatingPoint() ||
13813         VT != ST->getMemoryVT() ||
13814         LD->isNonTemporal() ||
13815         ST->isNonTemporal() ||
13816         LD->getPointerInfo().getAddrSpace() != 0 ||
13817         ST->getPointerInfo().getAddrSpace() != 0)
13818       return SDValue();
13819 
13820     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13821     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13822         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13823         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13824         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13825       return SDValue();
13826 
13827     unsigned LDAlign = LD->getAlignment();
13828     unsigned STAlign = ST->getAlignment();
13829     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13830     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13831     if (LDAlign < ABIAlign || STAlign < ABIAlign)
13832       return SDValue();
13833 
13834     SDValue NewLD =
13835         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13836                     LD->getPointerInfo(), LDAlign);
13837 
13838     SDValue NewST =
13839         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13840                      ST->getPointerInfo(), STAlign);
13841 
13842     AddToWorklist(NewLD.getNode());
13843     AddToWorklist(NewST.getNode());
13844     WorklistRemover DeadNodes(*this);
13845     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13846     ++LdStFP2Int;
13847     return NewST;
13848   }
13849 
13850   return SDValue();
13851 }
13852 
13853 // This is a helper function for visitMUL to check the profitability
13854 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13855 // MulNode is the original multiply, AddNode is (add x, c1),
13856 // and ConstNode is c2.
13857 //
13858 // If the (add x, c1) has multiple uses, we could increase
13859 // the number of adds if we make this transformation.
13860 // It would only be worth doing this if we can remove a
13861 // multiply in the process. Check for that here.
13862 // To illustrate:
13863 //     (A + c1) * c3
13864 //     (A + c2) * c3
13865 // We're checking for cases where we have common "c3 * A" expressions.
13866 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13867                                               SDValue &AddNode,
13868                                               SDValue &ConstNode) {
13869   APInt Val;
13870 
13871   // If the add only has one use, this would be OK to do.
13872   if (AddNode.getNode()->hasOneUse())
13873     return true;
13874 
13875   // Walk all the users of the constant with which we're multiplying.
13876   for (SDNode *Use : ConstNode->uses()) {
13877     if (Use == MulNode) // This use is the one we're on right now. Skip it.
13878       continue;
13879 
13880     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13881       SDNode *OtherOp;
13882       SDNode *MulVar = AddNode.getOperand(0).getNode();
13883 
13884       // OtherOp is what we're multiplying against the constant.
13885       if (Use->getOperand(0) == ConstNode)
13886         OtherOp = Use->getOperand(1).getNode();
13887       else
13888         OtherOp = Use->getOperand(0).getNode();
13889 
13890       // Check to see if multiply is with the same operand of our "add".
13891       //
13892       //     ConstNode  = CONST
13893       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
13894       //     ...
13895       //     AddNode  = (A + c1)  <-- MulVar is A.
13896       //         = AddNode * ConstNode   <-- current visiting instruction.
13897       //
13898       // If we make this transformation, we will have a common
13899       // multiply (ConstNode * A) that we can save.
13900       if (OtherOp == MulVar)
13901         return true;
13902 
13903       // Now check to see if a future expansion will give us a common
13904       // multiply.
13905       //
13906       //     ConstNode  = CONST
13907       //     AddNode    = (A + c1)
13908       //     ...   = AddNode * ConstNode <-- current visiting instruction.
13909       //     ...
13910       //     OtherOp = (A + c2)
13911       //     Use     = OtherOp * ConstNode <-- visiting Use.
13912       //
13913       // If we make this transformation, we will have a common
13914       // multiply (CONST * A) after we also do the same transformation
13915       // to the "t2" instruction.
13916       if (OtherOp->getOpcode() == ISD::ADD &&
13917           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13918           OtherOp->getOperand(0).getNode() == MulVar)
13919         return true;
13920     }
13921   }
13922 
13923   // Didn't find a case where this would be profitable.
13924   return false;
13925 }
13926 
13927 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13928                                          unsigned NumStores) {
13929   SmallVector<SDValue, 8> Chains;
13930   SmallPtrSet<const SDNode *, 8> Visited;
13931   SDLoc StoreDL(StoreNodes[0].MemNode);
13932 
13933   for (unsigned i = 0; i < NumStores; ++i) {
13934     Visited.insert(StoreNodes[i].MemNode);
13935   }
13936 
13937   // don't include nodes that are children
13938   for (unsigned i = 0; i < NumStores; ++i) {
13939     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13940       Chains.push_back(StoreNodes[i].MemNode->getChain());
13941   }
13942 
13943   assert(Chains.size() > 0 && "Chain should have generated a chain");
13944   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13945 }
13946 
13947 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13948     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13949     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13950   // Make sure we have something to merge.
13951   if (NumStores < 2)
13952     return false;
13953 
13954   // The latest Node in the DAG.
13955   SDLoc DL(StoreNodes[0].MemNode);
13956 
13957   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13958   unsigned SizeInBits = NumStores * ElementSizeBits;
13959   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13960 
13961   EVT StoreTy;
13962   if (UseVector) {
13963     unsigned Elts = NumStores * NumMemElts;
13964     // Get the type for the merged vector store.
13965     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13966   } else
13967     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
13968 
13969   SDValue StoredVal;
13970   if (UseVector) {
13971     if (IsConstantSrc) {
13972       SmallVector<SDValue, 8> BuildVector;
13973       for (unsigned I = 0; I != NumStores; ++I) {
13974         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
13975         SDValue Val = St->getValue();
13976         // If constant is of the wrong type, convert it now.
13977         if (MemVT != Val.getValueType()) {
13978           Val = peekThroughBitcasts(Val);
13979           // Deal with constants of wrong size.
13980           if (ElementSizeBits != Val.getValueSizeInBits()) {
13981             EVT IntMemVT =
13982                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
13983             if (isa<ConstantFPSDNode>(Val)) {
13984               // Not clear how to truncate FP values.
13985               return false;
13986             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
13987               Val = DAG.getConstant(C->getAPIntValue()
13988                                         .zextOrTrunc(Val.getValueSizeInBits())
13989                                         .zextOrTrunc(ElementSizeBits),
13990                                     SDLoc(C), IntMemVT);
13991           }
13992           // Make sure correctly size type is the correct type.
13993           Val = DAG.getBitcast(MemVT, Val);
13994         }
13995         BuildVector.push_back(Val);
13996       }
13997       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13998                                                : ISD::BUILD_VECTOR,
13999                               DL, StoreTy, BuildVector);
14000     } else {
14001       SmallVector<SDValue, 8> Ops;
14002       for (unsigned i = 0; i < NumStores; ++i) {
14003         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14004         SDValue Val = peekThroughBitcasts(St->getValue());
14005         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14006         // type MemVT. If the underlying value is not the correct
14007         // type, but it is an extraction of an appropriate vector we
14008         // can recast Val to be of the correct type. This may require
14009         // converting between EXTRACT_VECTOR_ELT and
14010         // EXTRACT_SUBVECTOR.
14011         if ((MemVT != Val.getValueType()) &&
14012             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14013              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14014           EVT MemVTScalarTy = MemVT.getScalarType();
14015           // We may need to add a bitcast here to get types to line up.
14016           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14017             Val = DAG.getBitcast(MemVT, Val);
14018           } else {
14019             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14020                                             : ISD::EXTRACT_VECTOR_ELT;
14021             SDValue Vec = Val.getOperand(0);
14022             SDValue Idx = Val.getOperand(1);
14023             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14024           }
14025         }
14026         Ops.push_back(Val);
14027       }
14028 
14029       // Build the extracted vector elements back into a vector.
14030       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14031                                                : ISD::BUILD_VECTOR,
14032                               DL, StoreTy, Ops);
14033     }
14034   } else {
14035     // We should always use a vector store when merging extracted vector
14036     // elements, so this path implies a store of constants.
14037     assert(IsConstantSrc && "Merged vector elements should use vector store");
14038 
14039     APInt StoreInt(SizeInBits, 0);
14040 
14041     // Construct a single integer constant which is made of the smaller
14042     // constant inputs.
14043     bool IsLE = DAG.getDataLayout().isLittleEndian();
14044     for (unsigned i = 0; i < NumStores; ++i) {
14045       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14046       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14047 
14048       SDValue Val = St->getValue();
14049       Val = peekThroughBitcasts(Val);
14050       StoreInt <<= ElementSizeBits;
14051       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14052         StoreInt |= C->getAPIntValue()
14053                         .zextOrTrunc(ElementSizeBits)
14054                         .zextOrTrunc(SizeInBits);
14055       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14056         StoreInt |= C->getValueAPF()
14057                         .bitcastToAPInt()
14058                         .zextOrTrunc(ElementSizeBits)
14059                         .zextOrTrunc(SizeInBits);
14060         // If fp truncation is necessary give up for now.
14061         if (MemVT.getSizeInBits() != ElementSizeBits)
14062           return false;
14063       } else {
14064         llvm_unreachable("Invalid constant element type");
14065       }
14066     }
14067 
14068     // Create the new Load and Store operations.
14069     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14070   }
14071 
14072   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14073   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14074 
14075   // make sure we use trunc store if it's necessary to be legal.
14076   SDValue NewStore;
14077   if (!UseTrunc) {
14078     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14079                             FirstInChain->getPointerInfo(),
14080                             FirstInChain->getAlignment());
14081   } else { // Must be realized as a trunc store
14082     EVT LegalizedStoredValTy =
14083         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14084     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14085     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14086     SDValue ExtendedStoreVal =
14087         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14088                         LegalizedStoredValTy);
14089     NewStore = DAG.getTruncStore(
14090         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14091         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14092         FirstInChain->getAlignment(),
14093         FirstInChain->getMemOperand()->getFlags());
14094   }
14095 
14096   // Replace all merged stores with the new store.
14097   for (unsigned i = 0; i < NumStores; ++i)
14098     CombineTo(StoreNodes[i].MemNode, NewStore);
14099 
14100   AddToWorklist(NewChain.getNode());
14101   return true;
14102 }
14103 
14104 void DAGCombiner::getStoreMergeCandidates(
14105     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14106     SDNode *&RootNode) {
14107   // This holds the base pointer, index, and the offset in bytes from the base
14108   // pointer.
14109   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14110   EVT MemVT = St->getMemoryVT();
14111 
14112   SDValue Val = peekThroughBitcasts(St->getValue());
14113   // We must have a base and an offset.
14114   if (!BasePtr.getBase().getNode())
14115     return;
14116 
14117   // Do not handle stores to undef base pointers.
14118   if (BasePtr.getBase().isUndef())
14119     return;
14120 
14121   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14122   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14123                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14124   bool IsLoadSrc = isa<LoadSDNode>(Val);
14125   BaseIndexOffset LBasePtr;
14126   // Match on loadbaseptr if relevant.
14127   EVT LoadVT;
14128   if (IsLoadSrc) {
14129     auto *Ld = cast<LoadSDNode>(Val);
14130     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14131     LoadVT = Ld->getMemoryVT();
14132     // Load and store should be the same type.
14133     if (MemVT != LoadVT)
14134       return;
14135     // Loads must only have one use.
14136     if (!Ld->hasNUsesOfValue(1, 0))
14137       return;
14138     // The memory operands must not be volatile.
14139     if (Ld->isVolatile() || Ld->isIndexed())
14140       return;
14141   }
14142   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14143                             int64_t &Offset) -> bool {
14144     if (Other->isVolatile() || Other->isIndexed())
14145       return false;
14146     SDValue Val = peekThroughBitcasts(Other->getValue());
14147     // Allow merging constants of different types as integers.
14148     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14149                                            : Other->getMemoryVT() != MemVT;
14150     if (IsLoadSrc) {
14151       if (NoTypeMatch)
14152         return false;
14153       // The Load's Base Ptr must also match
14154       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14155         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14156         if (LoadVT != OtherLd->getMemoryVT())
14157           return false;
14158         // Loads must only have one use.
14159         if (!OtherLd->hasNUsesOfValue(1, 0))
14160           return false;
14161         // The memory operands must not be volatile.
14162         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14163           return false;
14164         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14165           return false;
14166       } else
14167         return false;
14168     }
14169     if (IsConstantSrc) {
14170       if (NoTypeMatch)
14171         return false;
14172       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14173         return false;
14174     }
14175     if (IsExtractVecSrc) {
14176       // Do not merge truncated stores here.
14177       if (Other->isTruncatingStore())
14178         return false;
14179       if (!MemVT.bitsEq(Val.getValueType()))
14180         return false;
14181       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14182           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14183         return false;
14184     }
14185     Ptr = BaseIndexOffset::match(Other, DAG);
14186     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14187   };
14188 
14189   // We looking for a root node which is an ancestor to all mergable
14190   // stores. We search up through a load, to our root and then down
14191   // through all children. For instance we will find Store{1,2,3} if
14192   // St is Store1, Store2. or Store3 where the root is not a load
14193   // which always true for nonvolatile ops. TODO: Expand
14194   // the search to find all valid candidates through multiple layers of loads.
14195   //
14196   // Root
14197   // |-------|-------|
14198   // Load    Load    Store3
14199   // |       |
14200   // Store1   Store2
14201   //
14202   // FIXME: We should be able to climb and
14203   // descend TokenFactors to find candidates as well.
14204 
14205   RootNode = St->getChain().getNode();
14206 
14207   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14208     RootNode = Ldn->getChain().getNode();
14209     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14210       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14211         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14212           if (I2.getOperandNo() == 0)
14213             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14214               BaseIndexOffset Ptr;
14215               int64_t PtrDiff;
14216               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14217                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14218             }
14219   } else
14220     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14221       if (I.getOperandNo() == 0)
14222         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14223           BaseIndexOffset Ptr;
14224           int64_t PtrDiff;
14225           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14226             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14227         }
14228 }
14229 
14230 // We need to check that merging these stores does not cause a loop in
14231 // the DAG. Any store candidate may depend on another candidate
14232 // indirectly through its operand (we already consider dependencies
14233 // through the chain). Check in parallel by searching up from
14234 // non-chain operands of candidates.
14235 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14236     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14237     SDNode *RootNode) {
14238   // FIXME: We should be able to truncate a full search of
14239   // predecessors by doing a BFS and keeping tabs the originating
14240   // stores from which worklist nodes come from in a similar way to
14241   // TokenFactor simplfication.
14242 
14243   SmallPtrSet<const SDNode *, 32> Visited;
14244   SmallVector<const SDNode *, 8> Worklist;
14245 
14246   // RootNode is a predecessor to all candidates so we need not search
14247   // past it. Add RootNode (peeking through TokenFactors). Do not count
14248   // these towards size check.
14249 
14250   Worklist.push_back(RootNode);
14251   while (!Worklist.empty()) {
14252     auto N = Worklist.pop_back_val();
14253     if (!Visited.insert(N).second)
14254       continue; // Already present in Visited.
14255     if (N->getOpcode() == ISD::TokenFactor) {
14256       for (SDValue Op : N->ops())
14257         Worklist.push_back(Op.getNode());
14258     }
14259   }
14260 
14261   // Don't count pruning nodes towards max.
14262   unsigned int Max = 1024 + Visited.size();
14263   // Search Ops of store candidates.
14264   for (unsigned i = 0; i < NumStores; ++i) {
14265     SDNode *N = StoreNodes[i].MemNode;
14266     // Of the 4 Store Operands:
14267     //   * Chain (Op 0) -> We have already considered these
14268     //                    in candidate selection and can be
14269     //                    safely ignored
14270     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14271     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14272     //                       but aren't necessarily fromt the same base node, so
14273     //                       cycles possible (e.g. via indexed store).
14274     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14275     //               non-indexed stores). Not constant on all targets (e.g. ARM)
14276     //               and so can participate in a cycle.
14277     for (unsigned j = 1; j < N->getNumOperands(); ++j)
14278       Worklist.push_back(N->getOperand(j).getNode());
14279   }
14280   // Search through DAG. We can stop early if we find a store node.
14281   for (unsigned i = 0; i < NumStores; ++i)
14282     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14283                                      Max))
14284       return false;
14285   return true;
14286 }
14287 
14288 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14289   if (OptLevel == CodeGenOpt::None)
14290     return false;
14291 
14292   EVT MemVT = St->getMemoryVT();
14293   int64_t ElementSizeBytes = MemVT.getStoreSize();
14294   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14295 
14296   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14297     return false;
14298 
14299   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14300       Attribute::NoImplicitFloat);
14301 
14302   // This function cannot currently deal with non-byte-sized memory sizes.
14303   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14304     return false;
14305 
14306   if (!MemVT.isSimple())
14307     return false;
14308 
14309   // Perform an early exit check. Do not bother looking at stored values that
14310   // are not constants, loads, or extracted vector elements.
14311   SDValue StoredVal = peekThroughBitcasts(St->getValue());
14312   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14313   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14314                        isa<ConstantFPSDNode>(StoredVal);
14315   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14316                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14317 
14318   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14319     return false;
14320 
14321   SmallVector<MemOpLink, 8> StoreNodes;
14322   SDNode *RootNode;
14323   // Find potential store merge candidates by searching through chain sub-DAG
14324   getStoreMergeCandidates(St, StoreNodes, RootNode);
14325 
14326   // Check if there is anything to merge.
14327   if (StoreNodes.size() < 2)
14328     return false;
14329 
14330   // Sort the memory operands according to their distance from the
14331   // base pointer.
14332   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14333     return LHS.OffsetFromBase < RHS.OffsetFromBase;
14334   });
14335 
14336   // Store Merge attempts to merge the lowest stores. This generally
14337   // works out as if successful, as the remaining stores are checked
14338   // after the first collection of stores is merged. However, in the
14339   // case that a non-mergeable store is found first, e.g., {p[-2],
14340   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14341   // mergeable cases. To prevent this, we prune such stores from the
14342   // front of StoreNodes here.
14343 
14344   bool RV = false;
14345   while (StoreNodes.size() > 1) {
14346     unsigned StartIdx = 0;
14347     while ((StartIdx + 1 < StoreNodes.size()) &&
14348            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14349                StoreNodes[StartIdx + 1].OffsetFromBase)
14350       ++StartIdx;
14351 
14352     // Bail if we don't have enough candidates to merge.
14353     if (StartIdx + 1 >= StoreNodes.size())
14354       return RV;
14355 
14356     if (StartIdx)
14357       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14358 
14359     // Scan the memory operations on the chain and find the first
14360     // non-consecutive store memory address.
14361     unsigned NumConsecutiveStores = 1;
14362     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14363     // Check that the addresses are consecutive starting from the second
14364     // element in the list of stores.
14365     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14366       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14367       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14368         break;
14369       NumConsecutiveStores = i + 1;
14370     }
14371 
14372     if (NumConsecutiveStores < 2) {
14373       StoreNodes.erase(StoreNodes.begin(),
14374                        StoreNodes.begin() + NumConsecutiveStores);
14375       continue;
14376     }
14377 
14378     // The node with the lowest store address.
14379     LLVMContext &Context = *DAG.getContext();
14380     const DataLayout &DL = DAG.getDataLayout();
14381 
14382     // Store the constants into memory as one consecutive store.
14383     if (IsConstantSrc) {
14384       while (NumConsecutiveStores >= 2) {
14385         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14386         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14387         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14388         unsigned LastLegalType = 1;
14389         unsigned LastLegalVectorType = 1;
14390         bool LastIntegerTrunc = false;
14391         bool NonZero = false;
14392         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14393         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14394           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14395           SDValue StoredVal = ST->getValue();
14396           bool IsElementZero = false;
14397           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14398             IsElementZero = C->isNullValue();
14399           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14400             IsElementZero = C->getConstantFPValue()->isNullValue();
14401           if (IsElementZero) {
14402             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14403               FirstZeroAfterNonZero = i;
14404           }
14405           NonZero |= !IsElementZero;
14406 
14407           // Find a legal type for the constant store.
14408           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14409           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14410           bool IsFast = false;
14411 
14412           // Break early when size is too large to be legal.
14413           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14414             break;
14415 
14416           if (TLI.isTypeLegal(StoreTy) &&
14417               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14418               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14419                                      FirstStoreAlign, &IsFast) &&
14420               IsFast) {
14421             LastIntegerTrunc = false;
14422             LastLegalType = i + 1;
14423             // Or check whether a truncstore is legal.
14424           } else if (TLI.getTypeAction(Context, StoreTy) ==
14425                      TargetLowering::TypePromoteInteger) {
14426             EVT LegalizedStoredValTy =
14427                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14428             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14429                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14430                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14431                                        FirstStoreAlign, &IsFast) &&
14432                 IsFast) {
14433               LastIntegerTrunc = true;
14434               LastLegalType = i + 1;
14435             }
14436           }
14437 
14438           // We only use vectors if the constant is known to be zero or the
14439           // target allows it and the function is not marked with the
14440           // noimplicitfloat attribute.
14441           if ((!NonZero ||
14442                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14443               !NoVectors) {
14444             // Find a legal type for the vector store.
14445             unsigned Elts = (i + 1) * NumMemElts;
14446             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14447             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14448                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14449                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14450                                        FirstStoreAlign, &IsFast) &&
14451                 IsFast)
14452               LastLegalVectorType = i + 1;
14453           }
14454         }
14455 
14456         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14457         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14458 
14459         // Check if we found a legal integer type that creates a meaningful
14460         // merge.
14461         if (NumElem < 2) {
14462           // We know that candidate stores are in order and of correct
14463           // shape. While there is no mergeable sequence from the
14464           // beginning one may start later in the sequence. The only
14465           // reason a merge of size N could have failed where another of
14466           // the same size would not have, is if the alignment has
14467           // improved or we've dropped a non-zero value. Drop as many
14468           // candidates as we can here.
14469           unsigned NumSkip = 1;
14470           while (
14471               (NumSkip < NumConsecutiveStores) &&
14472               (NumSkip < FirstZeroAfterNonZero) &&
14473               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14474             NumSkip++;
14475 
14476           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14477           NumConsecutiveStores -= NumSkip;
14478           continue;
14479         }
14480 
14481         // Check that we can merge these candidates without causing a cycle.
14482         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14483                                                       RootNode)) {
14484           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14485           NumConsecutiveStores -= NumElem;
14486           continue;
14487         }
14488 
14489         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14490                                               UseVector, LastIntegerTrunc);
14491 
14492         // Remove merged stores for next iteration.
14493         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14494         NumConsecutiveStores -= NumElem;
14495       }
14496       continue;
14497     }
14498 
14499     // When extracting multiple vector elements, try to store them
14500     // in one vector store rather than a sequence of scalar stores.
14501     if (IsExtractVecSrc) {
14502       // Loop on Consecutive Stores on success.
14503       while (NumConsecutiveStores >= 2) {
14504         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14505         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14506         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14507         unsigned NumStoresToMerge = 1;
14508         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14509           // Find a legal type for the vector store.
14510           unsigned Elts = (i + 1) * NumMemElts;
14511           EVT Ty =
14512               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14513           bool IsFast;
14514 
14515           // Break early when size is too large to be legal.
14516           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14517             break;
14518 
14519           if (TLI.isTypeLegal(Ty) &&
14520               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14521               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14522                                      FirstStoreAlign, &IsFast) &&
14523               IsFast)
14524             NumStoresToMerge = i + 1;
14525         }
14526 
14527         // Check if we found a legal integer type creating a meaningful
14528         // merge.
14529         if (NumStoresToMerge < 2) {
14530           // We know that candidate stores are in order and of correct
14531           // shape. While there is no mergeable sequence from the
14532           // beginning one may start later in the sequence. The only
14533           // reason a merge of size N could have failed where another of
14534           // the same size would not have, is if the alignment has
14535           // improved. Drop as many candidates as we can here.
14536           unsigned NumSkip = 1;
14537           while (
14538               (NumSkip < NumConsecutiveStores) &&
14539               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14540             NumSkip++;
14541 
14542           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14543           NumConsecutiveStores -= NumSkip;
14544           continue;
14545         }
14546 
14547         // Check that we can merge these candidates without causing a cycle.
14548         if (!checkMergeStoreCandidatesForDependencies(
14549                 StoreNodes, NumStoresToMerge, RootNode)) {
14550           StoreNodes.erase(StoreNodes.begin(),
14551                            StoreNodes.begin() + NumStoresToMerge);
14552           NumConsecutiveStores -= NumStoresToMerge;
14553           continue;
14554         }
14555 
14556         RV |= MergeStoresOfConstantsOrVecElts(
14557             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14558 
14559         StoreNodes.erase(StoreNodes.begin(),
14560                          StoreNodes.begin() + NumStoresToMerge);
14561         NumConsecutiveStores -= NumStoresToMerge;
14562       }
14563       continue;
14564     }
14565 
14566     // Below we handle the case of multiple consecutive stores that
14567     // come from multiple consecutive loads. We merge them into a single
14568     // wide load and a single wide store.
14569 
14570     // Look for load nodes which are used by the stored values.
14571     SmallVector<MemOpLink, 8> LoadNodes;
14572 
14573     // Find acceptable loads. Loads need to have the same chain (token factor),
14574     // must not be zext, volatile, indexed, and they must be consecutive.
14575     BaseIndexOffset LdBasePtr;
14576 
14577     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14578       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14579       SDValue Val = peekThroughBitcasts(St->getValue());
14580       LoadSDNode *Ld = cast<LoadSDNode>(Val);
14581 
14582       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14583       // If this is not the first ptr that we check.
14584       int64_t LdOffset = 0;
14585       if (LdBasePtr.getBase().getNode()) {
14586         // The base ptr must be the same.
14587         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14588           break;
14589       } else {
14590         // Check that all other base pointers are the same as this one.
14591         LdBasePtr = LdPtr;
14592       }
14593 
14594       // We found a potential memory operand to merge.
14595       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14596     }
14597 
14598     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14599       // If we have load/store pair instructions and we only have two values,
14600       // don't bother merging.
14601       unsigned RequiredAlignment;
14602       if (LoadNodes.size() == 2 &&
14603           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14604           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14605         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14606         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14607         break;
14608       }
14609       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14610       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14611       unsigned FirstStoreAlign = FirstInChain->getAlignment();
14612       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14613       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14614       unsigned FirstLoadAlign = FirstLoad->getAlignment();
14615 
14616       // Scan the memory operations on the chain and find the first
14617       // non-consecutive load memory address. These variables hold the index in
14618       // the store node array.
14619 
14620       unsigned LastConsecutiveLoad = 1;
14621 
14622       // This variable refers to the size and not index in the array.
14623       unsigned LastLegalVectorType = 1;
14624       unsigned LastLegalIntegerType = 1;
14625       bool isDereferenceable = true;
14626       bool DoIntegerTruncate = false;
14627       StartAddress = LoadNodes[0].OffsetFromBase;
14628       SDValue FirstChain = FirstLoad->getChain();
14629       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14630         // All loads must share the same chain.
14631         if (LoadNodes[i].MemNode->getChain() != FirstChain)
14632           break;
14633 
14634         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14635         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14636           break;
14637         LastConsecutiveLoad = i;
14638 
14639         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14640           isDereferenceable = false;
14641 
14642         // Find a legal type for the vector store.
14643         unsigned Elts = (i + 1) * NumMemElts;
14644         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14645 
14646         // Break early when size is too large to be legal.
14647         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14648           break;
14649 
14650         bool IsFastSt, IsFastLd;
14651         if (TLI.isTypeLegal(StoreTy) &&
14652             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14653             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14654                                    FirstStoreAlign, &IsFastSt) &&
14655             IsFastSt &&
14656             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14657                                    FirstLoadAlign, &IsFastLd) &&
14658             IsFastLd) {
14659           LastLegalVectorType = i + 1;
14660         }
14661 
14662         // Find a legal type for the integer store.
14663         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14664         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14665         if (TLI.isTypeLegal(StoreTy) &&
14666             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14667             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14668                                    FirstStoreAlign, &IsFastSt) &&
14669             IsFastSt &&
14670             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14671                                    FirstLoadAlign, &IsFastLd) &&
14672             IsFastLd) {
14673           LastLegalIntegerType = i + 1;
14674           DoIntegerTruncate = false;
14675           // Or check whether a truncstore and extload is legal.
14676         } else if (TLI.getTypeAction(Context, StoreTy) ==
14677                    TargetLowering::TypePromoteInteger) {
14678           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14679           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14680               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14681               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14682                                  StoreTy) &&
14683               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14684                                  StoreTy) &&
14685               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14686               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14687                                      FirstStoreAlign, &IsFastSt) &&
14688               IsFastSt &&
14689               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14690                                      FirstLoadAlign, &IsFastLd) &&
14691               IsFastLd) {
14692             LastLegalIntegerType = i + 1;
14693             DoIntegerTruncate = true;
14694           }
14695         }
14696       }
14697 
14698       // Only use vector types if the vector type is larger than the integer
14699       // type. If they are the same, use integers.
14700       bool UseVectorTy =
14701           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14702       unsigned LastLegalType =
14703           std::max(LastLegalVectorType, LastLegalIntegerType);
14704 
14705       // We add +1 here because the LastXXX variables refer to location while
14706       // the NumElem refers to array/index size.
14707       unsigned NumElem =
14708           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14709       NumElem = std::min(LastLegalType, NumElem);
14710 
14711       if (NumElem < 2) {
14712         // We know that candidate stores are in order and of correct
14713         // shape. While there is no mergeable sequence from the
14714         // beginning one may start later in the sequence. The only
14715         // reason a merge of size N could have failed where another of
14716         // the same size would not have is if the alignment or either
14717         // the load or store has improved. Drop as many candidates as we
14718         // can here.
14719         unsigned NumSkip = 1;
14720         while ((NumSkip < LoadNodes.size()) &&
14721                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14722                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14723           NumSkip++;
14724         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14725         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14726         NumConsecutiveStores -= NumSkip;
14727         continue;
14728       }
14729 
14730       // Check that we can merge these candidates without causing a cycle.
14731       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14732                                                     RootNode)) {
14733         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14734         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14735         NumConsecutiveStores -= NumElem;
14736         continue;
14737       }
14738 
14739       // Find if it is better to use vectors or integers to load and store
14740       // to memory.
14741       EVT JointMemOpVT;
14742       if (UseVectorTy) {
14743         // Find a legal type for the vector store.
14744         unsigned Elts = NumElem * NumMemElts;
14745         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14746       } else {
14747         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14748         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14749       }
14750 
14751       SDLoc LoadDL(LoadNodes[0].MemNode);
14752       SDLoc StoreDL(StoreNodes[0].MemNode);
14753 
14754       // The merged loads are required to have the same incoming chain, so
14755       // using the first's chain is acceptable.
14756 
14757       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14758       AddToWorklist(NewStoreChain.getNode());
14759 
14760       MachineMemOperand::Flags MMOFlags =
14761           isDereferenceable ? MachineMemOperand::MODereferenceable
14762                             : MachineMemOperand::MONone;
14763 
14764       SDValue NewLoad, NewStore;
14765       if (UseVectorTy || !DoIntegerTruncate) {
14766         NewLoad =
14767             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14768                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14769                         FirstLoadAlign, MMOFlags);
14770         NewStore = DAG.getStore(
14771             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14772             FirstInChain->getPointerInfo(), FirstStoreAlign);
14773       } else { // This must be the truncstore/extload case
14774         EVT ExtendedTy =
14775             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14776         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14777                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
14778                                  FirstLoad->getPointerInfo(), JointMemOpVT,
14779                                  FirstLoadAlign, MMOFlags);
14780         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14781                                      FirstInChain->getBasePtr(),
14782                                      FirstInChain->getPointerInfo(),
14783                                      JointMemOpVT, FirstInChain->getAlignment(),
14784                                      FirstInChain->getMemOperand()->getFlags());
14785       }
14786 
14787       // Transfer chain users from old loads to the new load.
14788       for (unsigned i = 0; i < NumElem; ++i) {
14789         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14790         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14791                                       SDValue(NewLoad.getNode(), 1));
14792       }
14793 
14794       // Replace the all stores with the new store. Recursively remove
14795       // corresponding value if its no longer used.
14796       for (unsigned i = 0; i < NumElem; ++i) {
14797         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14798         CombineTo(StoreNodes[i].MemNode, NewStore);
14799         if (Val.getNode()->use_empty())
14800           recursivelyDeleteUnusedNodes(Val.getNode());
14801       }
14802 
14803       RV = true;
14804       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14805       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14806       NumConsecutiveStores -= NumElem;
14807     }
14808   }
14809   return RV;
14810 }
14811 
14812 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14813   SDLoc SL(ST);
14814   SDValue ReplStore;
14815 
14816   // Replace the chain to avoid dependency.
14817   if (ST->isTruncatingStore()) {
14818     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14819                                   ST->getBasePtr(), ST->getMemoryVT(),
14820                                   ST->getMemOperand());
14821   } else {
14822     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14823                              ST->getMemOperand());
14824   }
14825 
14826   // Create token to keep both nodes around.
14827   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14828                               MVT::Other, ST->getChain(), ReplStore);
14829 
14830   // Make sure the new and old chains are cleaned up.
14831   AddToWorklist(Token.getNode());
14832 
14833   // Don't add users to work list.
14834   return CombineTo(ST, Token, false);
14835 }
14836 
14837 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14838   SDValue Value = ST->getValue();
14839   if (Value.getOpcode() == ISD::TargetConstantFP)
14840     return SDValue();
14841 
14842   SDLoc DL(ST);
14843 
14844   SDValue Chain = ST->getChain();
14845   SDValue Ptr = ST->getBasePtr();
14846 
14847   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14848 
14849   // NOTE: If the original store is volatile, this transform must not increase
14850   // the number of stores.  For example, on x86-32 an f64 can be stored in one
14851   // processor operation but an i64 (which is not legal) requires two.  So the
14852   // transform should not be done in this case.
14853 
14854   SDValue Tmp;
14855   switch (CFP->getSimpleValueType(0).SimpleTy) {
14856   default:
14857     llvm_unreachable("Unknown FP type");
14858   case MVT::f16:    // We don't do this for these yet.
14859   case MVT::f80:
14860   case MVT::f128:
14861   case MVT::ppcf128:
14862     return SDValue();
14863   case MVT::f32:
14864     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14865         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14866       ;
14867       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14868                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14869                             MVT::i32);
14870       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14871     }
14872 
14873     return SDValue();
14874   case MVT::f64:
14875     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14876          !ST->isVolatile()) ||
14877         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14878       ;
14879       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14880                             getZExtValue(), SDLoc(CFP), MVT::i64);
14881       return DAG.getStore(Chain, DL, Tmp,
14882                           Ptr, ST->getMemOperand());
14883     }
14884 
14885     if (!ST->isVolatile() &&
14886         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14887       // Many FP stores are not made apparent until after legalize, e.g. for
14888       // argument passing.  Since this is so common, custom legalize the
14889       // 64-bit integer store into two 32-bit stores.
14890       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14891       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14892       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14893       if (DAG.getDataLayout().isBigEndian())
14894         std::swap(Lo, Hi);
14895 
14896       unsigned Alignment = ST->getAlignment();
14897       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14898       AAMDNodes AAInfo = ST->getAAInfo();
14899 
14900       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14901                                  ST->getAlignment(), MMOFlags, AAInfo);
14902       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14903                         DAG.getConstant(4, DL, Ptr.getValueType()));
14904       Alignment = MinAlign(Alignment, 4U);
14905       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14906                                  ST->getPointerInfo().getWithOffset(4),
14907                                  Alignment, MMOFlags, AAInfo);
14908       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14909                          St0, St1);
14910     }
14911 
14912     return SDValue();
14913   }
14914 }
14915 
14916 SDValue DAGCombiner::visitSTORE(SDNode *N) {
14917   StoreSDNode *ST  = cast<StoreSDNode>(N);
14918   SDValue Chain = ST->getChain();
14919   SDValue Value = ST->getValue();
14920   SDValue Ptr   = ST->getBasePtr();
14921 
14922   // If this is a store of a bit convert, store the input value if the
14923   // resultant store does not need a higher alignment than the original.
14924   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14925       ST->isUnindexed()) {
14926     EVT SVT = Value.getOperand(0).getValueType();
14927     // If the store is volatile, we only want to change the store type if the
14928     // resulting store is legal. Otherwise we might increase the number of
14929     // memory accesses. We don't care if the original type was legal or not
14930     // as we assume software couldn't rely on the number of accesses of an
14931     // illegal type.
14932     if (((!LegalOperations && !ST->isVolatile()) ||
14933          TLI.isOperationLegal(ISD::STORE, SVT)) &&
14934         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14935       unsigned OrigAlign = ST->getAlignment();
14936       bool Fast = false;
14937       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14938                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
14939           Fast) {
14940         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14941                             ST->getPointerInfo(), OrigAlign,
14942                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
14943       }
14944     }
14945   }
14946 
14947   // Turn 'store undef, Ptr' -> nothing.
14948   if (Value.isUndef() && ST->isUnindexed())
14949     return Chain;
14950 
14951   // Try to infer better alignment information than the store already has.
14952   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14953     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14954       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14955         SDValue NewStore =
14956             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14957                               ST->getMemoryVT(), Align,
14958                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
14959         // NewStore will always be N as we are only refining the alignment
14960         assert(NewStore.getNode() == N);
14961         (void)NewStore;
14962       }
14963     }
14964   }
14965 
14966   // Try transforming a pair floating point load / store ops to integer
14967   // load / store ops.
14968   if (SDValue NewST = TransformFPLoadStorePair(N))
14969     return NewST;
14970 
14971   if (ST->isUnindexed()) {
14972     // Walk up chain skipping non-aliasing memory nodes, on this store and any
14973     // adjacent stores.
14974     if (findBetterNeighborChains(ST)) {
14975       // replaceStoreChain uses CombineTo, which handled all of the worklist
14976       // manipulation. Return the original node to not do anything else.
14977       return SDValue(ST, 0);
14978     }
14979     Chain = ST->getChain();
14980   }
14981 
14982   // FIXME: is there such a thing as a truncating indexed store?
14983   if (ST->isTruncatingStore() && ST->isUnindexed() &&
14984       Value.getValueType().isInteger() &&
14985       (!isa<ConstantSDNode>(Value) ||
14986        !cast<ConstantSDNode>(Value)->isOpaque())) {
14987     // See if we can simplify the input to this truncstore with knowledge that
14988     // only the low bits are being used.  For example:
14989     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
14990     SDValue Shorter = DAG.GetDemandedBits(
14991         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14992                                     ST->getMemoryVT().getScalarSizeInBits()));
14993     AddToWorklist(Value.getNode());
14994     if (Shorter.getNode())
14995       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
14996                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
14997 
14998     // Otherwise, see if we can simplify the operation with
14999     // SimplifyDemandedBits, which only works if the value has a single use.
15000     if (SimplifyDemandedBits(
15001             Value,
15002             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15003                                  ST->getMemoryVT().getScalarSizeInBits()))) {
15004       // Re-visit the store if anything changed and the store hasn't been merged
15005       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15006       // node back to the worklist if necessary, but we also need to re-visit
15007       // the Store node itself.
15008       if (N->getOpcode() != ISD::DELETED_NODE)
15009         AddToWorklist(N);
15010       return SDValue(N, 0);
15011     }
15012   }
15013 
15014   // If this is a load followed by a store to the same location, then the store
15015   // is dead/noop.
15016   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15017     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15018         ST->isUnindexed() && !ST->isVolatile() &&
15019         // There can't be any side effects between the load and store, such as
15020         // a call or store.
15021         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15022       // The store is dead, remove it.
15023       return Chain;
15024     }
15025   }
15026 
15027   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15028     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15029         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
15030         ST->getMemoryVT() == ST1->getMemoryVT()) {
15031       // If this is a store followed by a store with the same value to the same
15032       // location, then the store is dead/noop.
15033       if (ST1->getValue() == Value) {
15034         // The store is dead, remove it.
15035         return Chain;
15036       }
15037 
15038       // If this is a store who's preceeding store to the same location
15039       // and no one other node is chained to that store we can effectively
15040       // drop the store. Do not remove stores to undef as they may be used as
15041       // data sinks.
15042       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15043           !ST1->getBasePtr().isUndef()) {
15044         // ST1 is fully overwritten and can be elided. Combine with it's chain
15045         // value.
15046         CombineTo(ST1, ST1->getChain());
15047         return SDValue();
15048       }
15049     }
15050   }
15051 
15052   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15053   // truncating store.  We can do this even if this is already a truncstore.
15054   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15055       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15056       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15057                             ST->getMemoryVT())) {
15058     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15059                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15060   }
15061 
15062   // Always perform this optimization before types are legal. If the target
15063   // prefers, also try this after legalization to catch stores that were created
15064   // by intrinsics or other nodes.
15065   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15066     while (true) {
15067       // There can be multiple store sequences on the same chain.
15068       // Keep trying to merge store sequences until we are unable to do so
15069       // or until we merge the last store on the chain.
15070       bool Changed = MergeConsecutiveStores(ST);
15071       if (!Changed) break;
15072       // Return N as merge only uses CombineTo and no worklist clean
15073       // up is necessary.
15074       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15075         return SDValue(N, 0);
15076     }
15077   }
15078 
15079   // Try transforming N to an indexed store.
15080   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15081     return SDValue(N, 0);
15082 
15083   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15084   //
15085   // Make sure to do this only after attempting to merge stores in order to
15086   //  avoid changing the types of some subset of stores due to visit order,
15087   //  preventing their merging.
15088   if (isa<ConstantFPSDNode>(ST->getValue())) {
15089     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15090       return NewSt;
15091   }
15092 
15093   if (SDValue NewSt = splitMergedValStore(ST))
15094     return NewSt;
15095 
15096   return ReduceLoadOpStoreWidth(N);
15097 }
15098 
15099 /// For the instruction sequence of store below, F and I values
15100 /// are bundled together as an i64 value before being stored into memory.
15101 /// Sometimes it is more efficent to generate separate stores for F and I,
15102 /// which can remove the bitwise instructions or sink them to colder places.
15103 ///
15104 ///   (store (or (zext (bitcast F to i32) to i64),
15105 ///              (shl (zext I to i64), 32)), addr)  -->
15106 ///   (store F, addr) and (store I, addr+4)
15107 ///
15108 /// Similarly, splitting for other merged store can also be beneficial, like:
15109 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15110 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15111 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15112 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15113 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15114 ///
15115 /// We allow each target to determine specifically which kind of splitting is
15116 /// supported.
15117 ///
15118 /// The store patterns are commonly seen from the simple code snippet below
15119 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15120 ///   void goo(const std::pair<int, float> &);
15121 ///   hoo() {
15122 ///     ...
15123 ///     goo(std::make_pair(tmp, ftmp));
15124 ///     ...
15125 ///   }
15126 ///
15127 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15128   if (OptLevel == CodeGenOpt::None)
15129     return SDValue();
15130 
15131   SDValue Val = ST->getValue();
15132   SDLoc DL(ST);
15133 
15134   // Match OR operand.
15135   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15136     return SDValue();
15137 
15138   // Match SHL operand and get Lower and Higher parts of Val.
15139   SDValue Op1 = Val.getOperand(0);
15140   SDValue Op2 = Val.getOperand(1);
15141   SDValue Lo, Hi;
15142   if (Op1.getOpcode() != ISD::SHL) {
15143     std::swap(Op1, Op2);
15144     if (Op1.getOpcode() != ISD::SHL)
15145       return SDValue();
15146   }
15147   Lo = Op2;
15148   Hi = Op1.getOperand(0);
15149   if (!Op1.hasOneUse())
15150     return SDValue();
15151 
15152   // Match shift amount to HalfValBitSize.
15153   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15154   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15155   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15156     return SDValue();
15157 
15158   // Lo and Hi are zero-extended from int with size less equal than 32
15159   // to i64.
15160   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15161       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15162       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15163       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15164       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15165       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15166     return SDValue();
15167 
15168   // Use the EVT of low and high parts before bitcast as the input
15169   // of target query.
15170   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15171                   ? Lo.getOperand(0).getValueType()
15172                   : Lo.getValueType();
15173   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15174                    ? Hi.getOperand(0).getValueType()
15175                    : Hi.getValueType();
15176   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15177     return SDValue();
15178 
15179   // Start to split store.
15180   unsigned Alignment = ST->getAlignment();
15181   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15182   AAMDNodes AAInfo = ST->getAAInfo();
15183 
15184   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15185   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15186   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15187   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15188 
15189   SDValue Chain = ST->getChain();
15190   SDValue Ptr = ST->getBasePtr();
15191   // Lower value store.
15192   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15193                              ST->getAlignment(), MMOFlags, AAInfo);
15194   Ptr =
15195       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15196                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15197   // Higher value store.
15198   SDValue St1 =
15199       DAG.getStore(St0, DL, Hi, Ptr,
15200                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15201                    Alignment / 2, MMOFlags, AAInfo);
15202   return St1;
15203 }
15204 
15205 /// Convert a disguised subvector insertion into a shuffle:
15206 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15207 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15208 /// Note: We do not use an insert_subvector node because that requires a legal
15209 /// subvector type.
15210 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15211   SDValue InsertVal = N->getOperand(1);
15212   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15213       !InsertVal.getOperand(0).getValueType().isVector())
15214     return SDValue();
15215 
15216   SDValue SubVec = InsertVal.getOperand(0);
15217   SDValue DestVec = N->getOperand(0);
15218   EVT SubVecVT = SubVec.getValueType();
15219   EVT VT = DestVec.getValueType();
15220   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15221   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15222   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15223 
15224   // Step 1: Create a shuffle mask that implements this insert operation. The
15225   // vector that we are inserting into will be operand 0 of the shuffle, so
15226   // those elements are just 'i'. The inserted subvector is in the first
15227   // positions of operand 1 of the shuffle. Example:
15228   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15229   SmallVector<int, 16> Mask(NumMaskVals);
15230   for (unsigned i = 0; i != NumMaskVals; ++i) {
15231     if (i / NumSrcElts == InsIndex)
15232       Mask[i] = (i % NumSrcElts) + NumMaskVals;
15233     else
15234       Mask[i] = i;
15235   }
15236 
15237   // Bail out if the target can not handle the shuffle we want to create.
15238   EVT SubVecEltVT = SubVecVT.getVectorElementType();
15239   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15240   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15241     return SDValue();
15242 
15243   // Step 2: Create a wide vector from the inserted source vector by appending
15244   // undefined elements. This is the same size as our destination vector.
15245   SDLoc DL(N);
15246   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15247   ConcatOps[0] = SubVec;
15248   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15249 
15250   // Step 3: Shuffle in the padded subvector.
15251   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15252   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15253   AddToWorklist(PaddedSubV.getNode());
15254   AddToWorklist(DestVecBC.getNode());
15255   AddToWorklist(Shuf.getNode());
15256   return DAG.getBitcast(VT, Shuf);
15257 }
15258 
15259 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15260   SDValue InVec = N->getOperand(0);
15261   SDValue InVal = N->getOperand(1);
15262   SDValue EltNo = N->getOperand(2);
15263   SDLoc DL(N);
15264 
15265   // If the inserted element is an UNDEF, just use the input vector.
15266   if (InVal.isUndef())
15267     return InVec;
15268 
15269   EVT VT = InVec.getValueType();
15270 
15271   // Remove redundant insertions:
15272   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15273   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15274       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15275     return InVec;
15276 
15277   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15278   if (!IndexC) {
15279     // If this is variable insert to undef vector, it might be better to splat:
15280     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15281     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15282       SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
15283       return DAG.getBuildVector(VT, DL, Ops);
15284     }
15285     return SDValue();
15286   }
15287 
15288   // We must know which element is being inserted for folds below here.
15289   unsigned Elt = IndexC->getZExtValue();
15290   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15291     return Shuf;
15292 
15293   // Canonicalize insert_vector_elt dag nodes.
15294   // Example:
15295   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15296   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15297   //
15298   // Do this only if the child insert_vector node has one use; also
15299   // do this only if indices are both constants and Idx1 < Idx0.
15300   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15301       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15302     unsigned OtherElt = InVec.getConstantOperandVal(2);
15303     if (Elt < OtherElt) {
15304       // Swap nodes.
15305       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15306                                   InVec.getOperand(0), InVal, EltNo);
15307       AddToWorklist(NewOp.getNode());
15308       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15309                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15310     }
15311   }
15312 
15313   // If we can't generate a legal BUILD_VECTOR, exit
15314   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15315     return SDValue();
15316 
15317   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15318   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15319   // vector elements.
15320   SmallVector<SDValue, 8> Ops;
15321   // Do not combine these two vectors if the output vector will not replace
15322   // the input vector.
15323   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15324     Ops.append(InVec.getNode()->op_begin(),
15325                InVec.getNode()->op_end());
15326   } else if (InVec.isUndef()) {
15327     unsigned NElts = VT.getVectorNumElements();
15328     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
15329   } else {
15330     return SDValue();
15331   }
15332 
15333   // Insert the element
15334   if (Elt < Ops.size()) {
15335     // All the operands of BUILD_VECTOR must have the same type;
15336     // we enforce that here.
15337     EVT OpVT = Ops[0].getValueType();
15338     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15339   }
15340 
15341   // Return the new vector
15342   return DAG.getBuildVector(VT, DL, Ops);
15343 }
15344 
15345 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
15346     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
15347   assert(!OriginalLoad->isVolatile());
15348 
15349   EVT ResultVT = EVE->getValueType(0);
15350   EVT VecEltVT = InVecVT.getVectorElementType();
15351   unsigned Align = OriginalLoad->getAlignment();
15352   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15353       VecEltVT.getTypeForEVT(*DAG.getContext()));
15354 
15355   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15356     return SDValue();
15357 
15358   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15359     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15360   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15361     return SDValue();
15362 
15363   Align = NewAlign;
15364 
15365   SDValue NewPtr = OriginalLoad->getBasePtr();
15366   SDValue Offset;
15367   EVT PtrType = NewPtr.getValueType();
15368   MachinePointerInfo MPI;
15369   SDLoc DL(EVE);
15370   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15371     int Elt = ConstEltNo->getZExtValue();
15372     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15373     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15374     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15375   } else {
15376     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15377     Offset = DAG.getNode(
15378         ISD::MUL, DL, PtrType, Offset,
15379         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15380     MPI = OriginalLoad->getPointerInfo();
15381   }
15382   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15383 
15384   // The replacement we need to do here is a little tricky: we need to
15385   // replace an extractelement of a load with a load.
15386   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15387   // Note that this replacement assumes that the extractvalue is the only
15388   // use of the load; that's okay because we don't want to perform this
15389   // transformation in other cases anyway.
15390   SDValue Load;
15391   SDValue Chain;
15392   if (ResultVT.bitsGT(VecEltVT)) {
15393     // If the result type of vextract is wider than the load, then issue an
15394     // extending load instead.
15395     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15396                                                   VecEltVT)
15397                                    ? ISD::ZEXTLOAD
15398                                    : ISD::EXTLOAD;
15399     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15400                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15401                           Align, OriginalLoad->getMemOperand()->getFlags(),
15402                           OriginalLoad->getAAInfo());
15403     Chain = Load.getValue(1);
15404   } else {
15405     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15406                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15407                        OriginalLoad->getAAInfo());
15408     Chain = Load.getValue(1);
15409     if (ResultVT.bitsLT(VecEltVT))
15410       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15411     else
15412       Load = DAG.getBitcast(ResultVT, Load);
15413   }
15414   WorklistRemover DeadNodes(*this);
15415   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15416   SDValue To[] = { Load, Chain };
15417   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15418   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15419   // worklist explicitly as well.
15420   AddToWorklist(Load.getNode());
15421   AddUsersToWorklist(Load.getNode()); // Add users too
15422   // Make sure to revisit this node to clean it up; it will usually be dead.
15423   AddToWorklist(EVE);
15424   ++OpsNarrowed;
15425   return SDValue(EVE, 0);
15426 }
15427 
15428 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15429   SDValue InVec = N->getOperand(0);
15430   EVT VT = InVec.getValueType();
15431   EVT NVT = N->getValueType(0);
15432   if (InVec.isUndef())
15433     return DAG.getUNDEF(NVT);
15434 
15435   // (vextract (scalar_to_vector val, 0) -> val
15436   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15437     // Check if the result type doesn't match the inserted element type. A
15438     // SCALAR_TO_VECTOR may truncate the inserted element and the
15439     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15440     SDValue InOp = InVec.getOperand(0);
15441     if (InOp.getValueType() != NVT) {
15442       assert(InOp.getValueType().isInteger() && NVT.isInteger());
15443       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
15444     }
15445     return InOp;
15446   }
15447 
15448   SDValue EltNo = N->getOperand(1);
15449   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
15450 
15451   // extract_vector_elt of out-of-bounds element -> UNDEF
15452   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
15453     return DAG.getUNDEF(NVT);
15454 
15455   // extract_vector_elt (build_vector x, y), 1 -> y
15456   if (ConstEltNo &&
15457       InVec.getOpcode() == ISD::BUILD_VECTOR &&
15458       TLI.isTypeLegal(VT) &&
15459       (InVec.hasOneUse() ||
15460        TLI.aggressivelyPreferBuildVectorSources(VT))) {
15461     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
15462     EVT InEltVT = Elt.getValueType();
15463 
15464     // Sometimes build_vector's scalar input types do not match result type.
15465     if (NVT == InEltVT)
15466       return Elt;
15467 
15468     // TODO: It may be useful to truncate if free if the build_vector implicitly
15469     // converts.
15470   }
15471 
15472   // TODO: These transforms should not require the 'hasOneUse' restriction, but
15473   // there are regressions on multiple targets without it. We can end up with a
15474   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
15475   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && VT.isInteger() &&
15476       InVec.hasOneUse()) {
15477     // The vector index of the LSBs of the source depend on the endian-ness.
15478     bool IsLE = DAG.getDataLayout().isLittleEndian();
15479     unsigned ExtractIndex = ConstEltNo->getZExtValue();
15480     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15481     unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1;
15482     SDValue BCSrc = InVec.getOperand(0);
15483     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
15484       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
15485 
15486     if (LegalTypes && BCSrc.getValueType().isInteger() &&
15487         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15488       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
15489       // trunc i64 X to i32
15490       SDValue X = BCSrc.getOperand(0);
15491       assert(X.getValueType().isScalarInteger() && NVT.isScalarInteger() &&
15492              "Extract element and scalar to vector can't change element type "
15493              "from FP to integer.");
15494       unsigned XBitWidth = X.getValueSizeInBits();
15495       unsigned VecEltBitWidth = VT.getScalarSizeInBits();
15496       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
15497 
15498       // An extract element return value type can be wider than its vector
15499       // operand element type. In that case, the high bits are undefined, so
15500       // it's possible that we may need to extend rather than truncate.
15501       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
15502         assert(XBitWidth % VecEltBitWidth == 0 &&
15503                "Scalar bitwidth must be a multiple of vector element bitwidth");
15504         return DAG.getAnyExtOrTrunc(X, SDLoc(N), NVT);
15505       }
15506     }
15507   }
15508 
15509   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15510   //
15511   // This only really matters if the index is non-constant since other combines
15512   // on the constant elements already work.
15513   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15514       EltNo == InVec.getOperand(2)) {
15515     SDValue Elt = InVec.getOperand(1);
15516     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
15517   }
15518 
15519   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15520   // We only perform this optimization before the op legalization phase because
15521   // we may introduce new vector instructions which are not backed by TD
15522   // patterns. For example on AVX, extracting elements from a wide vector
15523   // without using extract_subvector. However, if we can find an underlying
15524   // scalar value, then we can always use that.
15525   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
15526     int NumElem = VT.getVectorNumElements();
15527     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
15528     // Find the new index to extract from.
15529     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
15530 
15531     // Extracting an undef index is undef.
15532     if (OrigElt == -1)
15533       return DAG.getUNDEF(NVT);
15534 
15535     // Select the right vector half to extract from.
15536     SDValue SVInVec;
15537     if (OrigElt < NumElem) {
15538       SVInVec = InVec->getOperand(0);
15539     } else {
15540       SVInVec = InVec->getOperand(1);
15541       OrigElt -= NumElem;
15542     }
15543 
15544     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15545       SDValue InOp = SVInVec.getOperand(OrigElt);
15546       if (InOp.getValueType() != NVT) {
15547         assert(InOp.getValueType().isInteger() && NVT.isInteger());
15548         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
15549       }
15550 
15551       return InOp;
15552     }
15553 
15554     // FIXME: We should handle recursing on other vector shuffles and
15555     // scalar_to_vector here as well.
15556 
15557     if (!LegalOperations ||
15558         // FIXME: Should really be just isOperationLegalOrCustom.
15559         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
15560         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
15561       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15562       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
15563                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
15564     }
15565   }
15566 
15567   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15568   // simplify it based on the (valid) extraction indices.
15569   if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15570         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15571                Use->getOperand(0) == InVec &&
15572                isa<ConstantSDNode>(Use->getOperand(1));
15573       })) {
15574     APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15575     for (SDNode *Use : InVec->uses()) {
15576       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15577       if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15578         DemandedElts.setBit(CstElt->getZExtValue());
15579     }
15580     if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15581       return SDValue(N, 0);
15582   }
15583 
15584   bool BCNumEltsChanged = false;
15585   EVT ExtVT = VT.getVectorElementType();
15586   EVT LVT = ExtVT;
15587 
15588   // If the result of load has to be truncated, then it's not necessarily
15589   // profitable.
15590   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
15591     return SDValue();
15592 
15593   if (InVec.getOpcode() == ISD::BITCAST) {
15594     // Don't duplicate a load with other uses.
15595     if (!InVec.hasOneUse())
15596       return SDValue();
15597 
15598     EVT BCVT = InVec.getOperand(0).getValueType();
15599     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15600       return SDValue();
15601     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
15602       BCNumEltsChanged = true;
15603     InVec = InVec.getOperand(0);
15604     ExtVT = BCVT.getVectorElementType();
15605   }
15606 
15607   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
15608   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
15609       ISD::isNormalLoad(InVec.getNode()) &&
15610       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
15611     SDValue Index = N->getOperand(1);
15612     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
15613       if (!OrigLoad->isVolatile()) {
15614         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
15615                                                              OrigLoad);
15616       }
15617     }
15618   }
15619 
15620   // Perform only after legalization to ensure build_vector / vector_shuffle
15621   // optimizations have already been done.
15622   if (!LegalOperations) return SDValue();
15623 
15624   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15625   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15626   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15627 
15628   if (ConstEltNo) {
15629     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
15630 
15631     LoadSDNode *LN0 = nullptr;
15632     const ShuffleVectorSDNode *SVN = nullptr;
15633     if (ISD::isNormalLoad(InVec.getNode())) {
15634       LN0 = cast<LoadSDNode>(InVec);
15635     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15636                InVec.getOperand(0).getValueType() == ExtVT &&
15637                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
15638       // Don't duplicate a load with other uses.
15639       if (!InVec.hasOneUse())
15640         return SDValue();
15641 
15642       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
15643     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
15644       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15645       // =>
15646       // (load $addr+1*size)
15647 
15648       // Don't duplicate a load with other uses.
15649       if (!InVec.hasOneUse())
15650         return SDValue();
15651 
15652       // If the bit convert changed the number of elements, it is unsafe
15653       // to examine the mask.
15654       if (BCNumEltsChanged)
15655         return SDValue();
15656 
15657       // Select the input vector, guarding against out of range extract vector.
15658       unsigned NumElems = VT.getVectorNumElements();
15659       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
15660       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
15661 
15662       if (InVec.getOpcode() == ISD::BITCAST) {
15663         // Don't duplicate a load with other uses.
15664         if (!InVec.hasOneUse())
15665           return SDValue();
15666 
15667         InVec = InVec.getOperand(0);
15668       }
15669       if (ISD::isNormalLoad(InVec.getNode())) {
15670         LN0 = cast<LoadSDNode>(InVec);
15671         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
15672         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
15673       }
15674     }
15675 
15676     // Make sure we found a non-volatile load and the extractelement is
15677     // the only use.
15678     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15679       return SDValue();
15680 
15681     // If Idx was -1 above, Elt is going to be -1, so just return undef.
15682     if (Elt == -1)
15683       return DAG.getUNDEF(LVT);
15684 
15685     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
15686   }
15687 
15688   return SDValue();
15689 }
15690 
15691 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15692 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15693   // We perform this optimization post type-legalization because
15694   // the type-legalizer often scalarizes integer-promoted vectors.
15695   // Performing this optimization before may create bit-casts which
15696   // will be type-legalized to complex code sequences.
15697   // We perform this optimization only before the operation legalizer because we
15698   // may introduce illegal operations.
15699   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15700     return SDValue();
15701 
15702   unsigned NumInScalars = N->getNumOperands();
15703   SDLoc DL(N);
15704   EVT VT = N->getValueType(0);
15705 
15706   // Check to see if this is a BUILD_VECTOR of a bunch of values
15707   // which come from any_extend or zero_extend nodes. If so, we can create
15708   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15709   // optimizations. We do not handle sign-extend because we can't fill the sign
15710   // using shuffles.
15711   EVT SourceType = MVT::Other;
15712   bool AllAnyExt = true;
15713 
15714   for (unsigned i = 0; i != NumInScalars; ++i) {
15715     SDValue In = N->getOperand(i);
15716     // Ignore undef inputs.
15717     if (In.isUndef()) continue;
15718 
15719     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
15720     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15721 
15722     // Abort if the element is not an extension.
15723     if (!ZeroExt && !AnyExt) {
15724       SourceType = MVT::Other;
15725       break;
15726     }
15727 
15728     // The input is a ZeroExt or AnyExt. Check the original type.
15729     EVT InTy = In.getOperand(0).getValueType();
15730 
15731     // Check that all of the widened source types are the same.
15732     if (SourceType == MVT::Other)
15733       // First time.
15734       SourceType = InTy;
15735     else if (InTy != SourceType) {
15736       // Multiple income types. Abort.
15737       SourceType = MVT::Other;
15738       break;
15739     }
15740 
15741     // Check if all of the extends are ANY_EXTENDs.
15742     AllAnyExt &= AnyExt;
15743   }
15744 
15745   // In order to have valid types, all of the inputs must be extended from the
15746   // same source type and all of the inputs must be any or zero extend.
15747   // Scalar sizes must be a power of two.
15748   EVT OutScalarTy = VT.getScalarType();
15749   bool ValidTypes = SourceType != MVT::Other &&
15750                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15751                  isPowerOf2_32(SourceType.getSizeInBits());
15752 
15753   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15754   // turn into a single shuffle instruction.
15755   if (!ValidTypes)
15756     return SDValue();
15757 
15758   bool isLE = DAG.getDataLayout().isLittleEndian();
15759   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15760   assert(ElemRatio > 1 && "Invalid element size ratio");
15761   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15762                                DAG.getConstant(0, DL, SourceType);
15763 
15764   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15765   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15766 
15767   // Populate the new build_vector
15768   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15769     SDValue Cast = N->getOperand(i);
15770     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15771             Cast.getOpcode() == ISD::ZERO_EXTEND ||
15772             Cast.isUndef()) && "Invalid cast opcode");
15773     SDValue In;
15774     if (Cast.isUndef())
15775       In = DAG.getUNDEF(SourceType);
15776     else
15777       In = Cast->getOperand(0);
15778     unsigned Index = isLE ? (i * ElemRatio) :
15779                             (i * ElemRatio + (ElemRatio - 1));
15780 
15781     assert(Index < Ops.size() && "Invalid index");
15782     Ops[Index] = In;
15783   }
15784 
15785   // The type of the new BUILD_VECTOR node.
15786   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15787   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15788          "Invalid vector size");
15789   // Check if the new vector type is legal.
15790   if (!isTypeLegal(VecVT) ||
15791       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15792        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15793     return SDValue();
15794 
15795   // Make the new BUILD_VECTOR.
15796   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15797 
15798   // The new BUILD_VECTOR node has the potential to be further optimized.
15799   AddToWorklist(BV.getNode());
15800   // Bitcast to the desired type.
15801   return DAG.getBitcast(VT, BV);
15802 }
15803 
15804 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15805                                            ArrayRef<int> VectorMask,
15806                                            SDValue VecIn1, SDValue VecIn2,
15807                                            unsigned LeftIdx) {
15808   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15809   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15810 
15811   EVT VT = N->getValueType(0);
15812   EVT InVT1 = VecIn1.getValueType();
15813   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15814 
15815   unsigned Vec2Offset = 0;
15816   unsigned NumElems = VT.getVectorNumElements();
15817   unsigned ShuffleNumElems = NumElems;
15818 
15819   // In case both the input vectors are extracted from same base
15820   // vector we do not need extra addend (Vec2Offset) while
15821   // computing shuffle mask.
15822   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15823       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15824       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15825     Vec2Offset = InVT1.getVectorNumElements();
15826 
15827   // We can't generate a shuffle node with mismatched input and output types.
15828   // Try to make the types match the type of the output.
15829   if (InVT1 != VT || InVT2 != VT) {
15830     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15831       // If the output vector length is a multiple of both input lengths,
15832       // we can concatenate them and pad the rest with undefs.
15833       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15834       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15835       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15836       ConcatOps[0] = VecIn1;
15837       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15838       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15839       VecIn2 = SDValue();
15840     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15841       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15842         return SDValue();
15843 
15844       if (!VecIn2.getNode()) {
15845         // If we only have one input vector, and it's twice the size of the
15846         // output, split it in two.
15847         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15848                              DAG.getConstant(NumElems, DL, IdxTy));
15849         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15850         // Since we now have shorter input vectors, adjust the offset of the
15851         // second vector's start.
15852         Vec2Offset = NumElems;
15853       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15854         // VecIn1 is wider than the output, and we have another, possibly
15855         // smaller input. Pad the smaller input with undefs, shuffle at the
15856         // input vector width, and extract the output.
15857         // The shuffle type is different than VT, so check legality again.
15858         if (LegalOperations &&
15859             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15860           return SDValue();
15861 
15862         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15863         // lower it back into a BUILD_VECTOR. So if the inserted type is
15864         // illegal, don't even try.
15865         if (InVT1 != InVT2) {
15866           if (!TLI.isTypeLegal(InVT2))
15867             return SDValue();
15868           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15869                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15870         }
15871         ShuffleNumElems = NumElems * 2;
15872       } else {
15873         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15874         // than VecIn1. We can't handle this for now - this case will disappear
15875         // when we start sorting the vectors by type.
15876         return SDValue();
15877       }
15878     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15879                InVT1.getSizeInBits() == VT.getSizeInBits()) {
15880       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15881       ConcatOps[0] = VecIn2;
15882       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15883     } else {
15884       // TODO: Support cases where the length mismatch isn't exactly by a
15885       // factor of 2.
15886       // TODO: Move this check upwards, so that if we have bad type
15887       // mismatches, we don't create any DAG nodes.
15888       return SDValue();
15889     }
15890   }
15891 
15892   // Initialize mask to undef.
15893   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15894 
15895   // Only need to run up to the number of elements actually used, not the
15896   // total number of elements in the shuffle - if we are shuffling a wider
15897   // vector, the high lanes should be set to undef.
15898   for (unsigned i = 0; i != NumElems; ++i) {
15899     if (VectorMask[i] <= 0)
15900       continue;
15901 
15902     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15903     if (VectorMask[i] == (int)LeftIdx) {
15904       Mask[i] = ExtIndex;
15905     } else if (VectorMask[i] == (int)LeftIdx + 1) {
15906       Mask[i] = Vec2Offset + ExtIndex;
15907     }
15908   }
15909 
15910   // The type the input vectors may have changed above.
15911   InVT1 = VecIn1.getValueType();
15912 
15913   // If we already have a VecIn2, it should have the same type as VecIn1.
15914   // If we don't, get an undef/zero vector of the appropriate type.
15915   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15916   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15917 
15918   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15919   if (ShuffleNumElems > NumElems)
15920     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
15921 
15922   return Shuffle;
15923 }
15924 
15925 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
15926 // operations. If the types of the vectors we're extracting from allow it,
15927 // turn this into a vector_shuffle node.
15928 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
15929   SDLoc DL(N);
15930   EVT VT = N->getValueType(0);
15931 
15932   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
15933   if (!isTypeLegal(VT))
15934     return SDValue();
15935 
15936   // May only combine to shuffle after legalize if shuffle is legal.
15937   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
15938     return SDValue();
15939 
15940   bool UsesZeroVector = false;
15941   unsigned NumElems = N->getNumOperands();
15942 
15943   // Record, for each element of the newly built vector, which input vector
15944   // that element comes from. -1 stands for undef, 0 for the zero vector,
15945   // and positive values for the input vectors.
15946   // VectorMask maps each element to its vector number, and VecIn maps vector
15947   // numbers to their initial SDValues.
15948 
15949   SmallVector<int, 8> VectorMask(NumElems, -1);
15950   SmallVector<SDValue, 8> VecIn;
15951   VecIn.push_back(SDValue());
15952 
15953   for (unsigned i = 0; i != NumElems; ++i) {
15954     SDValue Op = N->getOperand(i);
15955 
15956     if (Op.isUndef())
15957       continue;
15958 
15959     // See if we can use a blend with a zero vector.
15960     // TODO: Should we generalize this to a blend with an arbitrary constant
15961     // vector?
15962     if (isNullConstant(Op) || isNullFPConstant(Op)) {
15963       UsesZeroVector = true;
15964       VectorMask[i] = 0;
15965       continue;
15966     }
15967 
15968     // Not an undef or zero. If the input is something other than an
15969     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
15970     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15971         !isa<ConstantSDNode>(Op.getOperand(1)))
15972       return SDValue();
15973     SDValue ExtractedFromVec = Op.getOperand(0);
15974 
15975     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
15976     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
15977       return SDValue();
15978 
15979     // All inputs must have the same element type as the output.
15980     if (VT.getVectorElementType() !=
15981         ExtractedFromVec.getValueType().getVectorElementType())
15982       return SDValue();
15983 
15984     // Have we seen this input vector before?
15985     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
15986     // a map back from SDValues to numbers isn't worth it.
15987     unsigned Idx = std::distance(
15988         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
15989     if (Idx == VecIn.size())
15990       VecIn.push_back(ExtractedFromVec);
15991 
15992     VectorMask[i] = Idx;
15993   }
15994 
15995   // If we didn't find at least one input vector, bail out.
15996   if (VecIn.size() < 2)
15997     return SDValue();
15998 
15999   // If all the Operands of BUILD_VECTOR extract from same
16000   // vector, then split the vector efficiently based on the maximum
16001   // vector access index and adjust the VectorMask and
16002   // VecIn accordingly.
16003   if (VecIn.size() == 2) {
16004     unsigned MaxIndex = 0;
16005     unsigned NearestPow2 = 0;
16006     SDValue Vec = VecIn.back();
16007     EVT InVT = Vec.getValueType();
16008     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16009     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16010 
16011     for (unsigned i = 0; i < NumElems; i++) {
16012       if (VectorMask[i] <= 0)
16013         continue;
16014       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16015       IndexVec[i] = Index;
16016       MaxIndex = std::max(MaxIndex, Index);
16017     }
16018 
16019     NearestPow2 = PowerOf2Ceil(MaxIndex);
16020     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16021         NumElems * 2 < NearestPow2) {
16022       unsigned SplitSize = NearestPow2 / 2;
16023       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16024                                      InVT.getVectorElementType(), SplitSize);
16025       if (TLI.isTypeLegal(SplitVT)) {
16026         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16027                                      DAG.getConstant(SplitSize, DL, IdxTy));
16028         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16029                                      DAG.getConstant(0, DL, IdxTy));
16030         VecIn.pop_back();
16031         VecIn.push_back(VecIn1);
16032         VecIn.push_back(VecIn2);
16033 
16034         for (unsigned i = 0; i < NumElems; i++) {
16035           if (VectorMask[i] <= 0)
16036             continue;
16037           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16038         }
16039       }
16040     }
16041   }
16042 
16043   // TODO: We want to sort the vectors by descending length, so that adjacent
16044   // pairs have similar length, and the longer vector is always first in the
16045   // pair.
16046 
16047   // TODO: Should this fire if some of the input vectors has illegal type (like
16048   // it does now), or should we let legalization run its course first?
16049 
16050   // Shuffle phase:
16051   // Take pairs of vectors, and shuffle them so that the result has elements
16052   // from these vectors in the correct places.
16053   // For example, given:
16054   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16055   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16056   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16057   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16058   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16059   // We will generate:
16060   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16061   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16062   SmallVector<SDValue, 4> Shuffles;
16063   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16064     unsigned LeftIdx = 2 * In + 1;
16065     SDValue VecLeft = VecIn[LeftIdx];
16066     SDValue VecRight =
16067         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16068 
16069     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16070                                                 VecRight, LeftIdx))
16071       Shuffles.push_back(Shuffle);
16072     else
16073       return SDValue();
16074   }
16075 
16076   // If we need the zero vector as an "ingredient" in the blend tree, add it
16077   // to the list of shuffles.
16078   if (UsesZeroVector)
16079     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16080                                       : DAG.getConstantFP(0.0, DL, VT));
16081 
16082   // If we only have one shuffle, we're done.
16083   if (Shuffles.size() == 1)
16084     return Shuffles[0];
16085 
16086   // Update the vector mask to point to the post-shuffle vectors.
16087   for (int &Vec : VectorMask)
16088     if (Vec == 0)
16089       Vec = Shuffles.size() - 1;
16090     else
16091       Vec = (Vec - 1) / 2;
16092 
16093   // More than one shuffle. Generate a binary tree of blends, e.g. if from
16094   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16095   // generate:
16096   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16097   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16098   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16099   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16100   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16101   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16102   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16103 
16104   // Make sure the initial size of the shuffle list is even.
16105   if (Shuffles.size() % 2)
16106     Shuffles.push_back(DAG.getUNDEF(VT));
16107 
16108   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16109     if (CurSize % 2) {
16110       Shuffles[CurSize] = DAG.getUNDEF(VT);
16111       CurSize++;
16112     }
16113     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16114       int Left = 2 * In;
16115       int Right = 2 * In + 1;
16116       SmallVector<int, 8> Mask(NumElems, -1);
16117       for (unsigned i = 0; i != NumElems; ++i) {
16118         if (VectorMask[i] == Left) {
16119           Mask[i] = i;
16120           VectorMask[i] = In;
16121         } else if (VectorMask[i] == Right) {
16122           Mask[i] = i + NumElems;
16123           VectorMask[i] = In;
16124         }
16125       }
16126 
16127       Shuffles[In] =
16128           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16129     }
16130   }
16131   return Shuffles[0];
16132 }
16133 
16134 // Try to turn a build vector of zero extends of extract vector elts into a
16135 // a vector zero extend and possibly an extract subvector.
16136 // TODO: Support sign extend or any extend?
16137 // TODO: Allow undef elements?
16138 // TODO: Don't require the extracts to start at element 0.
16139 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16140   if (LegalOperations)
16141     return SDValue();
16142 
16143   EVT VT = N->getValueType(0);
16144 
16145   SDValue Op0 = N->getOperand(0);
16146   auto checkElem = [&](SDValue Op) -> int64_t {
16147     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16148         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16149         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16150       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16151         return C->getZExtValue();
16152     return -1;
16153   };
16154 
16155   // Make sure the first element matches
16156   // (zext (extract_vector_elt X, C))
16157   int64_t Offset = checkElem(Op0);
16158   if (Offset < 0)
16159     return SDValue();
16160 
16161   unsigned NumElems = N->getNumOperands();
16162   SDValue In = Op0.getOperand(0).getOperand(0);
16163   EVT InSVT = In.getValueType().getScalarType();
16164   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16165 
16166   // Don't create an illegal input type after type legalization.
16167   if (LegalTypes && !TLI.isTypeLegal(InVT))
16168     return SDValue();
16169 
16170   // Ensure all the elements come from the same vector and are adjacent.
16171   for (unsigned i = 1; i != NumElems; ++i) {
16172     if ((Offset + i) != checkElem(N->getOperand(i)))
16173       return SDValue();
16174   }
16175 
16176   SDLoc DL(N);
16177   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16178                    Op0.getOperand(0).getOperand(1));
16179   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16180 }
16181 
16182 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16183   EVT VT = N->getValueType(0);
16184 
16185   // A vector built entirely of undefs is undef.
16186   if (ISD::allOperandsUndef(N))
16187     return DAG.getUNDEF(VT);
16188 
16189   // If this is a splat of a bitcast from another vector, change to a
16190   // concat_vector.
16191   // For example:
16192   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16193   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16194   //
16195   // If X is a build_vector itself, the concat can become a larger build_vector.
16196   // TODO: Maybe this is useful for non-splat too?
16197   if (!LegalOperations) {
16198     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16199       Splat = peekThroughBitcasts(Splat);
16200       EVT SrcVT = Splat.getValueType();
16201       if (SrcVT.isVector()) {
16202         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16203         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16204                                      SrcVT.getVectorElementType(), NumElts);
16205         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16206           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16207           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
16208                                        NewVT, Ops);
16209           return DAG.getBitcast(VT, Concat);
16210         }
16211       }
16212     }
16213   }
16214 
16215   // Check if we can express BUILD VECTOR via subvector extract.
16216   if (!LegalTypes && (N->getNumOperands() > 1)) {
16217     SDValue Op0 = N->getOperand(0);
16218     auto checkElem = [&](SDValue Op) -> uint64_t {
16219       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16220           (Op0.getOperand(0) == Op.getOperand(0)))
16221         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16222           return CNode->getZExtValue();
16223       return -1;
16224     };
16225 
16226     int Offset = checkElem(Op0);
16227     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16228       if (Offset + i != checkElem(N->getOperand(i))) {
16229         Offset = -1;
16230         break;
16231       }
16232     }
16233 
16234     if ((Offset == 0) &&
16235         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16236       return Op0.getOperand(0);
16237     if ((Offset != -1) &&
16238         ((Offset % N->getValueType(0).getVectorNumElements()) ==
16239          0)) // IDX must be multiple of output size.
16240       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16241                          Op0.getOperand(0), Op0.getOperand(1));
16242   }
16243 
16244   if (SDValue V = convertBuildVecZextToZext(N))
16245     return V;
16246 
16247   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16248     return V;
16249 
16250   if (SDValue V = reduceBuildVecToShuffle(N))
16251     return V;
16252 
16253   return SDValue();
16254 }
16255 
16256 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16257   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16258   EVT OpVT = N->getOperand(0).getValueType();
16259 
16260   // If the operands are legal vectors, leave them alone.
16261   if (TLI.isTypeLegal(OpVT))
16262     return SDValue();
16263 
16264   SDLoc DL(N);
16265   EVT VT = N->getValueType(0);
16266   SmallVector<SDValue, 8> Ops;
16267 
16268   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16269   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16270 
16271   // Keep track of what we encounter.
16272   bool AnyInteger = false;
16273   bool AnyFP = false;
16274   for (const SDValue &Op : N->ops()) {
16275     if (ISD::BITCAST == Op.getOpcode() &&
16276         !Op.getOperand(0).getValueType().isVector())
16277       Ops.push_back(Op.getOperand(0));
16278     else if (ISD::UNDEF == Op.getOpcode())
16279       Ops.push_back(ScalarUndef);
16280     else
16281       return SDValue();
16282 
16283     // Note whether we encounter an integer or floating point scalar.
16284     // If it's neither, bail out, it could be something weird like x86mmx.
16285     EVT LastOpVT = Ops.back().getValueType();
16286     if (LastOpVT.isFloatingPoint())
16287       AnyFP = true;
16288     else if (LastOpVT.isInteger())
16289       AnyInteger = true;
16290     else
16291       return SDValue();
16292   }
16293 
16294   // If any of the operands is a floating point scalar bitcast to a vector,
16295   // use floating point types throughout, and bitcast everything.
16296   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16297   if (AnyFP) {
16298     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16299     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16300     if (AnyInteger) {
16301       for (SDValue &Op : Ops) {
16302         if (Op.getValueType() == SVT)
16303           continue;
16304         if (Op.isUndef())
16305           Op = ScalarUndef;
16306         else
16307           Op = DAG.getBitcast(SVT, Op);
16308       }
16309     }
16310   }
16311 
16312   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16313                                VT.getSizeInBits() / SVT.getSizeInBits());
16314   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16315 }
16316 
16317 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16318 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16319 // most two distinct vectors the same size as the result, attempt to turn this
16320 // into a legal shuffle.
16321 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16322   EVT VT = N->getValueType(0);
16323   EVT OpVT = N->getOperand(0).getValueType();
16324   int NumElts = VT.getVectorNumElements();
16325   int NumOpElts = OpVT.getVectorNumElements();
16326 
16327   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16328   SmallVector<int, 8> Mask;
16329 
16330   for (SDValue Op : N->ops()) {
16331     Op = peekThroughBitcasts(Op);
16332 
16333     // UNDEF nodes convert to UNDEF shuffle mask values.
16334     if (Op.isUndef()) {
16335       Mask.append((unsigned)NumOpElts, -1);
16336       continue;
16337     }
16338 
16339     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16340       return SDValue();
16341 
16342     // What vector are we extracting the subvector from and at what index?
16343     SDValue ExtVec = Op.getOperand(0);
16344 
16345     // We want the EVT of the original extraction to correctly scale the
16346     // extraction index.
16347     EVT ExtVT = ExtVec.getValueType();
16348     ExtVec = peekThroughBitcasts(ExtVec);
16349 
16350     // UNDEF nodes convert to UNDEF shuffle mask values.
16351     if (ExtVec.isUndef()) {
16352       Mask.append((unsigned)NumOpElts, -1);
16353       continue;
16354     }
16355 
16356     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16357       return SDValue();
16358     int ExtIdx = Op.getConstantOperandVal(1);
16359 
16360     // Ensure that we are extracting a subvector from a vector the same
16361     // size as the result.
16362     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16363       return SDValue();
16364 
16365     // Scale the subvector index to account for any bitcast.
16366     int NumExtElts = ExtVT.getVectorNumElements();
16367     if (0 == (NumExtElts % NumElts))
16368       ExtIdx /= (NumExtElts / NumElts);
16369     else if (0 == (NumElts % NumExtElts))
16370       ExtIdx *= (NumElts / NumExtElts);
16371     else
16372       return SDValue();
16373 
16374     // At most we can reference 2 inputs in the final shuffle.
16375     if (SV0.isUndef() || SV0 == ExtVec) {
16376       SV0 = ExtVec;
16377       for (int i = 0; i != NumOpElts; ++i)
16378         Mask.push_back(i + ExtIdx);
16379     } else if (SV1.isUndef() || SV1 == ExtVec) {
16380       SV1 = ExtVec;
16381       for (int i = 0; i != NumOpElts; ++i)
16382         Mask.push_back(i + ExtIdx + NumElts);
16383     } else {
16384       return SDValue();
16385     }
16386   }
16387 
16388   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16389     return SDValue();
16390 
16391   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16392                               DAG.getBitcast(VT, SV1), Mask);
16393 }
16394 
16395 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16396   // If we only have one input vector, we don't need to do any concatenation.
16397   if (N->getNumOperands() == 1)
16398     return N->getOperand(0);
16399 
16400   // Check if all of the operands are undefs.
16401   EVT VT = N->getValueType(0);
16402   if (ISD::allOperandsUndef(N))
16403     return DAG.getUNDEF(VT);
16404 
16405   // Optimize concat_vectors where all but the first of the vectors are undef.
16406   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16407         return Op.isUndef();
16408       })) {
16409     SDValue In = N->getOperand(0);
16410     assert(In.getValueType().isVector() && "Must concat vectors");
16411 
16412     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
16413     if (In->getOpcode() == ISD::BITCAST &&
16414         !In->getOperand(0).getValueType().isVector()) {
16415       SDValue Scalar = In->getOperand(0);
16416 
16417       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16418       // look through the trunc so we can still do the transform:
16419       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16420       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16421           !TLI.isTypeLegal(Scalar.getValueType()) &&
16422           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16423         Scalar = Scalar->getOperand(0);
16424 
16425       EVT SclTy = Scalar->getValueType(0);
16426 
16427       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16428         return SDValue();
16429 
16430       // Bail out if the vector size is not a multiple of the scalar size.
16431       if (VT.getSizeInBits() % SclTy.getSizeInBits())
16432         return SDValue();
16433 
16434       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16435       if (VNTNumElms < 2)
16436         return SDValue();
16437 
16438       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16439       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16440         return SDValue();
16441 
16442       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16443       return DAG.getBitcast(VT, Res);
16444     }
16445   }
16446 
16447   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16448   // We have already tested above for an UNDEF only concatenation.
16449   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16450   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16451   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16452     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16453   };
16454   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16455     SmallVector<SDValue, 8> Opnds;
16456     EVT SVT = VT.getScalarType();
16457 
16458     EVT MinVT = SVT;
16459     if (!SVT.isFloatingPoint()) {
16460       // If BUILD_VECTOR are from built from integer, they may have different
16461       // operand types. Get the smallest type and truncate all operands to it.
16462       bool FoundMinVT = false;
16463       for (const SDValue &Op : N->ops())
16464         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16465           EVT OpSVT = Op.getOperand(0).getValueType();
16466           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16467           FoundMinVT = true;
16468         }
16469       assert(FoundMinVT && "Concat vector type mismatch");
16470     }
16471 
16472     for (const SDValue &Op : N->ops()) {
16473       EVT OpVT = Op.getValueType();
16474       unsigned NumElts = OpVT.getVectorNumElements();
16475 
16476       if (ISD::UNDEF == Op.getOpcode())
16477         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16478 
16479       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16480         if (SVT.isFloatingPoint()) {
16481           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16482           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16483         } else {
16484           for (unsigned i = 0; i != NumElts; ++i)
16485             Opnds.push_back(
16486                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16487         }
16488       }
16489     }
16490 
16491     assert(VT.getVectorNumElements() == Opnds.size() &&
16492            "Concat vector type mismatch");
16493     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16494   }
16495 
16496   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16497   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16498     return V;
16499 
16500   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16501   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16502     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16503       return V;
16504 
16505   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16506   // nodes often generate nop CONCAT_VECTOR nodes.
16507   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16508   // place the incoming vectors at the exact same location.
16509   SDValue SingleSource = SDValue();
16510   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16511 
16512   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16513     SDValue Op = N->getOperand(i);
16514 
16515     if (Op.isUndef())
16516       continue;
16517 
16518     // Check if this is the identity extract:
16519     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16520       return SDValue();
16521 
16522     // Find the single incoming vector for the extract_subvector.
16523     if (SingleSource.getNode()) {
16524       if (Op.getOperand(0) != SingleSource)
16525         return SDValue();
16526     } else {
16527       SingleSource = Op.getOperand(0);
16528 
16529       // Check the source type is the same as the type of the result.
16530       // If not, this concat may extend the vector, so we can not
16531       // optimize it away.
16532       if (SingleSource.getValueType() != N->getValueType(0))
16533         return SDValue();
16534     }
16535 
16536     unsigned IdentityIndex = i * PartNumElem;
16537     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16538     // The extract index must be constant.
16539     if (!CS)
16540       return SDValue();
16541 
16542     // Check that we are reading from the identity index.
16543     if (CS->getZExtValue() != IdentityIndex)
16544       return SDValue();
16545   }
16546 
16547   if (SingleSource.getNode())
16548     return SingleSource;
16549 
16550   return SDValue();
16551 }
16552 
16553 /// If we are extracting a subvector produced by a wide binary operator try
16554 /// to use a narrow binary operator and/or avoid concatenation and extraction.
16555 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16556   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16557   // some of these bailouts with other transforms.
16558 
16559   // The extract index must be a constant, so we can map it to a concat operand.
16560   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16561   if (!ExtractIndexC)
16562     return SDValue();
16563 
16564   // We are looking for an optionally bitcasted wide vector binary operator
16565   // feeding an extract subvector.
16566   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
16567   if (!ISD::isBinaryOp(BinOp.getNode()))
16568     return SDValue();
16569 
16570   // The binop must be a vector type, so we can chop it in half.
16571   EVT WideBVT = BinOp.getValueType();
16572   if (!WideBVT.isVector())
16573     return SDValue();
16574 
16575   EVT VT = Extract->getValueType(0);
16576   unsigned NumElems = VT.getVectorNumElements();
16577   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
16578   assert(ExtractIndex % NumElems == 0 &&
16579          "Extract index is not a multiple of the vector length.");
16580   EVT SrcVT = Extract->getOperand(0).getValueType();
16581 
16582   // Bail out if this is not a proper multiple width extraction.
16583   unsigned NumSrcElems = SrcVT.getVectorNumElements();
16584   if (NumSrcElems % NumElems != 0)
16585     return SDValue();
16586 
16587   // Bail out if the target does not support a narrower version of the binop.
16588   unsigned NarrowingRatio = NumSrcElems / NumElems;
16589   unsigned BOpcode = BinOp.getOpcode();
16590   unsigned WideNumElts = WideBVT.getVectorNumElements();
16591   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16592                                    WideNumElts / NarrowingRatio);
16593   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16594   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16595     return SDValue();
16596 
16597   // If extraction is cheap, we don't need to look at the binop operands
16598   // for concat ops. The narrow binop alone makes this transform profitable.
16599   // We can't just reuse the original extract index operand because we may have
16600   // bitcasted.
16601   unsigned ConcatOpNum = ExtractIndex / NumElems;
16602   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16603   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16604   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
16605       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
16606     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
16607     SDLoc DL(Extract);
16608     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
16609     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16610                             BinOp.getOperand(0), NewExtIndex);
16611     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16612                             BinOp.getOperand(1), NewExtIndex);
16613     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
16614                                       BinOp.getNode()->getFlags());
16615     return DAG.getBitcast(VT, NarrowBinOp);
16616   }
16617 
16618   // Only handle the case where we are doubling and then halving. A larger ratio
16619   // may require more than two narrow binops to replace the wide binop.
16620   if (NarrowingRatio != 2)
16621     return SDValue();
16622 
16623   // TODO: The motivating case for this transform is an x86 AVX1 target. That
16624   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16625   // flavors, but no other 256-bit integer support. This could be extended to
16626   // handle any binop, but that may require fixing/adding other folds to avoid
16627   // codegen regressions.
16628   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16629     return SDValue();
16630 
16631   // We need at least one concatenation operation of a binop operand to make
16632   // this transform worthwhile. The concat must double the input vector sizes.
16633   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16634   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
16635   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
16636   bool ConcatL =
16637       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
16638   bool ConcatR =
16639       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
16640   if (!ConcatL && !ConcatR)
16641     return SDValue();
16642 
16643   // If one of the binop operands was not the result of a concat, we must
16644   // extract a half-sized operand for our new narrow binop.
16645   SDLoc DL(Extract);
16646 
16647   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
16648   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
16649   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
16650   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
16651                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16652                                     BinOp.getOperand(0),
16653                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16654 
16655   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
16656                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16657                                     BinOp.getOperand(1),
16658                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16659 
16660   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
16661   return DAG.getBitcast(VT, NarrowBinOp);
16662 }
16663 
16664 /// If we are extracting a subvector from a wide vector load, convert to a
16665 /// narrow load to eliminate the extraction:
16666 /// (extract_subvector (load wide vector)) --> (load narrow vector)
16667 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16668   // TODO: Add support for big-endian. The offset calculation must be adjusted.
16669   if (DAG.getDataLayout().isBigEndian())
16670     return SDValue();
16671 
16672   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16673   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16674   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
16675     return SDValue();
16676 
16677   // Allow targets to opt-out.
16678   EVT VT = Extract->getValueType(0);
16679   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16680   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
16681     return SDValue();
16682 
16683   // The narrow load will be offset from the base address of the old load if
16684   // we are extracting from something besides index 0 (little-endian).
16685   SDLoc DL(Extract);
16686   SDValue BaseAddr = Ld->getOperand(1);
16687   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16688 
16689   // TODO: Use "BaseIndexOffset" to make this more effective.
16690   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16691   MachineFunction &MF = DAG.getMachineFunction();
16692   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16693                                                    VT.getStoreSize());
16694   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16695   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16696   return NewLd;
16697 }
16698 
16699 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16700   EVT NVT = N->getValueType(0);
16701   SDValue V = N->getOperand(0);
16702 
16703   // Extract from UNDEF is UNDEF.
16704   if (V.isUndef())
16705     return DAG.getUNDEF(NVT);
16706 
16707   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16708     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16709       return NarrowLoad;
16710 
16711   // Combine:
16712   //    (extract_subvec (concat V1, V2, ...), i)
16713   // Into:
16714   //    Vi if possible
16715   // Only operand 0 is checked as 'concat' assumes all inputs of the same
16716   // type.
16717   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16718       isa<ConstantSDNode>(N->getOperand(1)) &&
16719       V->getOperand(0).getValueType() == NVT) {
16720     unsigned Idx = N->getConstantOperandVal(1);
16721     unsigned NumElems = NVT.getVectorNumElements();
16722     assert((Idx % NumElems) == 0 &&
16723            "IDX in concat is not a multiple of the result vector length.");
16724     return V->getOperand(Idx / NumElems);
16725   }
16726 
16727   V = peekThroughBitcasts(V);
16728 
16729   // If the input is a build vector. Try to make a smaller build vector.
16730   if (V->getOpcode() == ISD::BUILD_VECTOR) {
16731     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16732       EVT InVT = V->getValueType(0);
16733       unsigned ExtractSize = NVT.getSizeInBits();
16734       unsigned EltSize = InVT.getScalarSizeInBits();
16735       // Only do this if we won't split any elements.
16736       if (ExtractSize % EltSize == 0) {
16737         unsigned NumElems = ExtractSize / EltSize;
16738         EVT EltVT = InVT.getVectorElementType();
16739         EVT ExtractVT = NumElems == 1 ? EltVT :
16740           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16741         if ((Level < AfterLegalizeDAG ||
16742              (NumElems == 1 ||
16743               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16744             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16745           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16746                             EltSize;
16747           if (NumElems == 1) {
16748             SDValue Src = V->getOperand(IdxVal);
16749             if (EltVT != Src.getValueType())
16750               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16751 
16752             return DAG.getBitcast(NVT, Src);
16753           }
16754 
16755           // Extract the pieces from the original build_vector.
16756           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16757                                             makeArrayRef(V->op_begin() + IdxVal,
16758                                                          NumElems));
16759           return DAG.getBitcast(NVT, BuildVec);
16760         }
16761       }
16762     }
16763   }
16764 
16765   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16766     // Handle only simple case where vector being inserted and vector
16767     // being extracted are of same size.
16768     EVT SmallVT = V->getOperand(1).getValueType();
16769     if (!NVT.bitsEq(SmallVT))
16770       return SDValue();
16771 
16772     // Only handle cases where both indexes are constants.
16773     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16774     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16775 
16776     if (InsIdx && ExtIdx) {
16777       // Combine:
16778       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16779       // Into:
16780       //    indices are equal or bit offsets are equal => V1
16781       //    otherwise => (extract_subvec V1, ExtIdx)
16782       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16783           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16784         return DAG.getBitcast(NVT, V->getOperand(1));
16785       return DAG.getNode(
16786           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16787           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16788           N->getOperand(1));
16789     }
16790   }
16791 
16792   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16793     return NarrowBOp;
16794 
16795   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16796     return SDValue(N, 0);
16797 
16798   return SDValue();
16799 }
16800 
16801 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16802 // or turn a shuffle of a single concat into simpler shuffle then concat.
16803 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16804   EVT VT = N->getValueType(0);
16805   unsigned NumElts = VT.getVectorNumElements();
16806 
16807   SDValue N0 = N->getOperand(0);
16808   SDValue N1 = N->getOperand(1);
16809   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16810 
16811   SmallVector<SDValue, 4> Ops;
16812   EVT ConcatVT = N0.getOperand(0).getValueType();
16813   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16814   unsigned NumConcats = NumElts / NumElemsPerConcat;
16815 
16816   // Special case: shuffle(concat(A,B)) can be more efficiently represented
16817   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16818   // half vector elements.
16819   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16820       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16821                   SVN->getMask().end(), [](int i) { return i == -1; })) {
16822     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16823                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16824     N1 = DAG.getUNDEF(ConcatVT);
16825     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16826   }
16827 
16828   // Look at every vector that's inserted. We're looking for exact
16829   // subvector-sized copies from a concatenated vector
16830   for (unsigned I = 0; I != NumConcats; ++I) {
16831     // Make sure we're dealing with a copy.
16832     unsigned Begin = I * NumElemsPerConcat;
16833     bool AllUndef = true, NoUndef = true;
16834     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16835       if (SVN->getMaskElt(J) >= 0)
16836         AllUndef = false;
16837       else
16838         NoUndef = false;
16839     }
16840 
16841     if (NoUndef) {
16842       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16843         return SDValue();
16844 
16845       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16846         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16847           return SDValue();
16848 
16849       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16850       if (FirstElt < N0.getNumOperands())
16851         Ops.push_back(N0.getOperand(FirstElt));
16852       else
16853         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16854 
16855     } else if (AllUndef) {
16856       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16857     } else { // Mixed with general masks and undefs, can't do optimization.
16858       return SDValue();
16859     }
16860   }
16861 
16862   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16863 }
16864 
16865 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16866 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16867 //
16868 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16869 // a simplification in some sense, but it isn't appropriate in general: some
16870 // BUILD_VECTORs are substantially cheaper than others. The general case
16871 // of a BUILD_VECTOR requires inserting each element individually (or
16872 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16873 // all constants is a single constant pool load.  A BUILD_VECTOR where each
16874 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
16875 // are undef lowers to a small number of element insertions.
16876 //
16877 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16878 // We don't fold shuffles where one side is a non-zero constant, and we don't
16879 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16880 // non-constant operands. This seems to work out reasonably well in practice.
16881 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16882                                        SelectionDAG &DAG,
16883                                        const TargetLowering &TLI) {
16884   EVT VT = SVN->getValueType(0);
16885   unsigned NumElts = VT.getVectorNumElements();
16886   SDValue N0 = SVN->getOperand(0);
16887   SDValue N1 = SVN->getOperand(1);
16888 
16889   if (!N0->hasOneUse())
16890     return SDValue();
16891 
16892   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16893   // discussed above.
16894   if (!N1.isUndef()) {
16895     if (!N1->hasOneUse())
16896       return SDValue();
16897 
16898     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16899     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16900     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16901       return SDValue();
16902     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16903       return SDValue();
16904   }
16905 
16906   // If both inputs are splats of the same value then we can safely merge this
16907   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16908   bool IsSplat = false;
16909   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16910   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16911   if (BV0 && BV1)
16912     if (SDValue Splat0 = BV0->getSplatValue())
16913       IsSplat = (Splat0 == BV1->getSplatValue());
16914 
16915   SmallVector<SDValue, 8> Ops;
16916   SmallSet<SDValue, 16> DuplicateOps;
16917   for (int M : SVN->getMask()) {
16918     SDValue Op = DAG.getUNDEF(VT.getScalarType());
16919     if (M >= 0) {
16920       int Idx = M < (int)NumElts ? M : M - NumElts;
16921       SDValue &S = (M < (int)NumElts ? N0 : N1);
16922       if (S.getOpcode() == ISD::BUILD_VECTOR) {
16923         Op = S.getOperand(Idx);
16924       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16925         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16926         Op = S.getOperand(0);
16927       } else {
16928         // Operand can't be combined - bail out.
16929         return SDValue();
16930       }
16931     }
16932 
16933     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16934     // generating a splat; semantically, this is fine, but it's likely to
16935     // generate low-quality code if the target can't reconstruct an appropriate
16936     // shuffle.
16937     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16938       if (!IsSplat && !DuplicateOps.insert(Op).second)
16939         return SDValue();
16940 
16941     Ops.push_back(Op);
16942   }
16943 
16944   // BUILD_VECTOR requires all inputs to be of the same type, find the
16945   // maximum type and extend them all.
16946   EVT SVT = VT.getScalarType();
16947   if (SVT.isInteger())
16948     for (SDValue &Op : Ops)
16949       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
16950   if (SVT != VT.getScalarType())
16951     for (SDValue &Op : Ops)
16952       Op = TLI.isZExtFree(Op.getValueType(), SVT)
16953                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
16954                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
16955   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
16956 }
16957 
16958 // Match shuffles that can be converted to any_vector_extend_in_reg.
16959 // This is often generated during legalization.
16960 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
16961 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
16962 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
16963                                             SelectionDAG &DAG,
16964                                             const TargetLowering &TLI,
16965                                             bool LegalOperations,
16966                                             bool LegalTypes) {
16967   EVT VT = SVN->getValueType(0);
16968   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16969 
16970   // TODO Add support for big-endian when we have a test case.
16971   if (!VT.isInteger() || IsBigEndian)
16972     return SDValue();
16973 
16974   unsigned NumElts = VT.getVectorNumElements();
16975   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16976   ArrayRef<int> Mask = SVN->getMask();
16977   SDValue N0 = SVN->getOperand(0);
16978 
16979   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
16980   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
16981     for (unsigned i = 0; i != NumElts; ++i) {
16982       if (Mask[i] < 0)
16983         continue;
16984       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
16985         continue;
16986       return false;
16987     }
16988     return true;
16989   };
16990 
16991   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
16992   // power-of-2 extensions as they are the most likely.
16993   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
16994     // Check for non power of 2 vector sizes
16995     if (NumElts % Scale != 0)
16996       continue;
16997     if (!isAnyExtend(Scale))
16998       continue;
16999 
17000     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17001     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17002     if (!LegalTypes || TLI.isTypeLegal(OutVT))
17003       if (!LegalOperations ||
17004           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17005         return DAG.getBitcast(VT,
17006                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17007                                           SDLoc(SVN), OutVT, N0));
17008   }
17009 
17010   return SDValue();
17011 }
17012 
17013 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17014 // each source element of a large type into the lowest elements of a smaller
17015 // destination type. This is often generated during legalization.
17016 // If the source node itself was a '*_extend_vector_inreg' node then we should
17017 // then be able to remove it.
17018 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17019                                         SelectionDAG &DAG) {
17020   EVT VT = SVN->getValueType(0);
17021   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17022 
17023   // TODO Add support for big-endian when we have a test case.
17024   if (!VT.isInteger() || IsBigEndian)
17025     return SDValue();
17026 
17027   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17028 
17029   unsigned Opcode = N0.getOpcode();
17030   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17031       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17032       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17033     return SDValue();
17034 
17035   SDValue N00 = N0.getOperand(0);
17036   ArrayRef<int> Mask = SVN->getMask();
17037   unsigned NumElts = VT.getVectorNumElements();
17038   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17039   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17040   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17041 
17042   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17043     return SDValue();
17044   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17045 
17046   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17047   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17048   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17049   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17050     for (unsigned i = 0; i != NumElts; ++i) {
17051       if (Mask[i] < 0)
17052         continue;
17053       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17054         continue;
17055       return false;
17056     }
17057     return true;
17058   };
17059 
17060   // At the moment we just handle the case where we've truncated back to the
17061   // same size as before the extension.
17062   // TODO: handle more extension/truncation cases as cases arise.
17063   if (EltSizeInBits != ExtSrcSizeInBits)
17064     return SDValue();
17065 
17066   // We can remove *extend_vector_inreg only if the truncation happens at
17067   // the same scale as the extension.
17068   if (isTruncate(ExtScale))
17069     return DAG.getBitcast(VT, N00);
17070 
17071   return SDValue();
17072 }
17073 
17074 // Combine shuffles of splat-shuffles of the form:
17075 // shuffle (shuffle V, undef, splat-mask), undef, M
17076 // If splat-mask contains undef elements, we need to be careful about
17077 // introducing undef's in the folded mask which are not the result of composing
17078 // the masks of the shuffles.
17079 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
17080                                      ShuffleVectorSDNode *Splat,
17081                                      SelectionDAG &DAG) {
17082   ArrayRef<int> SplatMask = Splat->getMask();
17083   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17084 
17085   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17086   // every undef mask element in the splat-shuffle has a corresponding undef
17087   // element in the user-shuffle's mask or if the composition of mask elements
17088   // would result in undef.
17089   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17090   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17091   //   In this case it is not legal to simplify to the splat-shuffle because we
17092   //   may be exposing the users of the shuffle an undef element at index 1
17093   //   which was not there before the combine.
17094   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17095   //   In this case the composition of masks yields SplatMask, so it's ok to
17096   //   simplify to the splat-shuffle.
17097   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17098   //   In this case the composed mask includes all undef elements of SplatMask
17099   //   and in addition sets element zero to undef. It is safe to simplify to
17100   //   the splat-shuffle.
17101   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17102                                        ArrayRef<int> SplatMask) {
17103     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17104       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17105           SplatMask[UserMask[i]] != -1)
17106         return false;
17107     return true;
17108   };
17109   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17110     return SDValue(Splat, 0);
17111 
17112   // Create a new shuffle with a mask that is composed of the two shuffles'
17113   // masks.
17114   SmallVector<int, 32> NewMask;
17115   for (int Idx : UserMask)
17116     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17117 
17118   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17119                               Splat->getOperand(0), Splat->getOperand(1),
17120                               NewMask);
17121 }
17122 
17123 /// If the shuffle mask is taking exactly one element from the first vector
17124 /// operand and passing through all other elements from the second vector
17125 /// operand, return the index of the mask element that is choosing an element
17126 /// from the first operand. Otherwise, return -1.
17127 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
17128   int MaskSize = Mask.size();
17129   int EltFromOp0 = -1;
17130   // TODO: This does not match if there are undef elements in the shuffle mask.
17131   // Should we ignore undefs in the shuffle mask instead? The trade-off is
17132   // removing an instruction (a shuffle), but losing the knowledge that some
17133   // vector lanes are not needed.
17134   for (int i = 0; i != MaskSize; ++i) {
17135     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17136       // We're looking for a shuffle of exactly one element from operand 0.
17137       if (EltFromOp0 != -1)
17138         return -1;
17139       EltFromOp0 = i;
17140     } else if (Mask[i] != i + MaskSize) {
17141       // Nothing from operand 1 can change lanes.
17142       return -1;
17143     }
17144   }
17145   return EltFromOp0;
17146 }
17147 
17148 /// If a shuffle inserts exactly one element from a source vector operand into
17149 /// another vector operand and we can access the specified element as a scalar,
17150 /// then we can eliminate the shuffle.
17151 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
17152                                       SelectionDAG &DAG) {
17153   // First, check if we are taking one element of a vector and shuffling that
17154   // element into another vector.
17155   ArrayRef<int> Mask = Shuf->getMask();
17156   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17157   SDValue Op0 = Shuf->getOperand(0);
17158   SDValue Op1 = Shuf->getOperand(1);
17159   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17160   if (ShufOp0Index == -1) {
17161     // Commute mask and check again.
17162     ShuffleVectorSDNode::commuteMask(CommutedMask);
17163     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17164     if (ShufOp0Index == -1)
17165       return SDValue();
17166     // Commute operands to match the commuted shuffle mask.
17167     std::swap(Op0, Op1);
17168     Mask = CommutedMask;
17169   }
17170 
17171   // The shuffle inserts exactly one element from operand 0 into operand 1.
17172   // Now see if we can access that element as a scalar via a real insert element
17173   // instruction.
17174   // TODO: We can try harder to locate the element as a scalar. Examples: it
17175   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17176   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17177          "Shuffle mask value must be from operand 0");
17178   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17179     return SDValue();
17180 
17181   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17182   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17183     return SDValue();
17184 
17185   // There's an existing insertelement with constant insertion index, so we
17186   // don't need to check the legality/profitability of a replacement operation
17187   // that differs at most in the constant value. The target should be able to
17188   // lower any of those in a similar way. If not, legalization will expand this
17189   // to a scalar-to-vector plus shuffle.
17190   //
17191   // Note that the shuffle may move the scalar from the position that the insert
17192   // element used. Therefore, our new insert element occurs at the shuffle's
17193   // mask index value, not the insert's index value.
17194   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17195   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17196                                         Op0.getOperand(2).getValueType());
17197   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17198                      Op1, Op0.getOperand(1), NewInsIndex);
17199 }
17200 
17201 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17202   EVT VT = N->getValueType(0);
17203   unsigned NumElts = VT.getVectorNumElements();
17204 
17205   SDValue N0 = N->getOperand(0);
17206   SDValue N1 = N->getOperand(1);
17207 
17208   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17209 
17210   // Canonicalize shuffle undef, undef -> undef
17211   if (N0.isUndef() && N1.isUndef())
17212     return DAG.getUNDEF(VT);
17213 
17214   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17215 
17216   // Canonicalize shuffle v, v -> v, undef
17217   if (N0 == N1) {
17218     SmallVector<int, 8> NewMask;
17219     for (unsigned i = 0; i != NumElts; ++i) {
17220       int Idx = SVN->getMaskElt(i);
17221       if (Idx >= (int)NumElts) Idx -= NumElts;
17222       NewMask.push_back(Idx);
17223     }
17224     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17225   }
17226 
17227   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
17228   if (N0.isUndef())
17229     return DAG.getCommutedVectorShuffle(*SVN);
17230 
17231   // Remove references to rhs if it is undef
17232   if (N1.isUndef()) {
17233     bool Changed = false;
17234     SmallVector<int, 8> NewMask;
17235     for (unsigned i = 0; i != NumElts; ++i) {
17236       int Idx = SVN->getMaskElt(i);
17237       if (Idx >= (int)NumElts) {
17238         Idx = -1;
17239         Changed = true;
17240       }
17241       NewMask.push_back(Idx);
17242     }
17243     if (Changed)
17244       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17245   }
17246 
17247   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17248     return InsElt;
17249 
17250   // A shuffle of a single vector that is a splat can always be folded.
17251   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17252     if (N1->isUndef() && N0Shuf->isSplat())
17253       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17254 
17255   // If it is a splat, check if the argument vector is another splat or a
17256   // build_vector.
17257   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17258     SDNode *V = N0.getNode();
17259 
17260     // If this is a bit convert that changes the element type of the vector but
17261     // not the number of vector elements, look through it.  Be careful not to
17262     // look though conversions that change things like v4f32 to v2f64.
17263     if (V->getOpcode() == ISD::BITCAST) {
17264       SDValue ConvInput = V->getOperand(0);
17265       if (ConvInput.getValueType().isVector() &&
17266           ConvInput.getValueType().getVectorNumElements() == NumElts)
17267         V = ConvInput.getNode();
17268     }
17269 
17270     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17271       assert(V->getNumOperands() == NumElts &&
17272              "BUILD_VECTOR has wrong number of operands");
17273       SDValue Base;
17274       bool AllSame = true;
17275       for (unsigned i = 0; i != NumElts; ++i) {
17276         if (!V->getOperand(i).isUndef()) {
17277           Base = V->getOperand(i);
17278           break;
17279         }
17280       }
17281       // Splat of <u, u, u, u>, return <u, u, u, u>
17282       if (!Base.getNode())
17283         return N0;
17284       for (unsigned i = 0; i != NumElts; ++i) {
17285         if (V->getOperand(i) != Base) {
17286           AllSame = false;
17287           break;
17288         }
17289       }
17290       // Splat of <x, x, x, x>, return <x, x, x, x>
17291       if (AllSame)
17292         return N0;
17293 
17294       // Canonicalize any other splat as a build_vector.
17295       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17296       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17297       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17298 
17299       // We may have jumped through bitcasts, so the type of the
17300       // BUILD_VECTOR may not match the type of the shuffle.
17301       if (V->getValueType(0) != VT)
17302         NewBV = DAG.getBitcast(VT, NewBV);
17303       return NewBV;
17304     }
17305   }
17306 
17307   // Simplify source operands based on shuffle mask.
17308   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17309     return SDValue(N, 0);
17310 
17311   // Match shuffles that can be converted to any_vector_extend_in_reg.
17312   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
17313     return V;
17314 
17315   // Combine "truncate_vector_in_reg" style shuffles.
17316   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17317     return V;
17318 
17319   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17320       Level < AfterLegalizeVectorOps &&
17321       (N1.isUndef() ||
17322       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17323        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17324     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17325       return V;
17326   }
17327 
17328   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17329   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17330   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
17331     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17332       return Res;
17333 
17334   // If this shuffle only has a single input that is a bitcasted shuffle,
17335   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17336   // back to their original types.
17337   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17338       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17339       TLI.isTypeLegal(VT)) {
17340     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17341       if (Scale == 1)
17342         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17343 
17344       SmallVector<int, 8> NewMask;
17345       for (int M : Mask)
17346         for (int s = 0; s != Scale; ++s)
17347           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17348       return NewMask;
17349     };
17350 
17351     SDValue BC0 = peekThroughOneUseBitcasts(N0);
17352     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17353       EVT SVT = VT.getScalarType();
17354       EVT InnerVT = BC0->getValueType(0);
17355       EVT InnerSVT = InnerVT.getScalarType();
17356 
17357       // Determine which shuffle works with the smaller scalar type.
17358       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17359       EVT ScaleSVT = ScaleVT.getScalarType();
17360 
17361       if (TLI.isTypeLegal(ScaleVT) &&
17362           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17363           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17364         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17365         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17366 
17367         // Scale the shuffle masks to the smaller scalar type.
17368         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17369         SmallVector<int, 8> InnerMask =
17370             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17371         SmallVector<int, 8> OuterMask =
17372             ScaleShuffleMask(SVN->getMask(), OuterScale);
17373 
17374         // Merge the shuffle masks.
17375         SmallVector<int, 8> NewMask;
17376         for (int M : OuterMask)
17377           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17378 
17379         // Test for shuffle mask legality over both commutations.
17380         SDValue SV0 = BC0->getOperand(0);
17381         SDValue SV1 = BC0->getOperand(1);
17382         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17383         if (!LegalMask) {
17384           std::swap(SV0, SV1);
17385           ShuffleVectorSDNode::commuteMask(NewMask);
17386           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17387         }
17388 
17389         if (LegalMask) {
17390           SV0 = DAG.getBitcast(ScaleVT, SV0);
17391           SV1 = DAG.getBitcast(ScaleVT, SV1);
17392           return DAG.getBitcast(
17393               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17394         }
17395       }
17396     }
17397   }
17398 
17399   // Canonicalize shuffles according to rules:
17400   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17401   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17402   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17403   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17404       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17405       TLI.isTypeLegal(VT)) {
17406     // The incoming shuffle must be of the same type as the result of the
17407     // current shuffle.
17408     assert(N1->getOperand(0).getValueType() == VT &&
17409            "Shuffle types don't match");
17410 
17411     SDValue SV0 = N1->getOperand(0);
17412     SDValue SV1 = N1->getOperand(1);
17413     bool HasSameOp0 = N0 == SV0;
17414     bool IsSV1Undef = SV1.isUndef();
17415     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17416       // Commute the operands of this shuffle so that next rule
17417       // will trigger.
17418       return DAG.getCommutedVectorShuffle(*SVN);
17419   }
17420 
17421   // Try to fold according to rules:
17422   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17423   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17424   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17425   // Don't try to fold shuffles with illegal type.
17426   // Only fold if this shuffle is the only user of the other shuffle.
17427   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17428       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17429     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17430 
17431     // Don't try to fold splats; they're likely to simplify somehow, or they
17432     // might be free.
17433     if (OtherSV->isSplat())
17434       return SDValue();
17435 
17436     // The incoming shuffle must be of the same type as the result of the
17437     // current shuffle.
17438     assert(OtherSV->getOperand(0).getValueType() == VT &&
17439            "Shuffle types don't match");
17440 
17441     SDValue SV0, SV1;
17442     SmallVector<int, 4> Mask;
17443     // Compute the combined shuffle mask for a shuffle with SV0 as the first
17444     // operand, and SV1 as the second operand.
17445     for (unsigned i = 0; i != NumElts; ++i) {
17446       int Idx = SVN->getMaskElt(i);
17447       if (Idx < 0) {
17448         // Propagate Undef.
17449         Mask.push_back(Idx);
17450         continue;
17451       }
17452 
17453       SDValue CurrentVec;
17454       if (Idx < (int)NumElts) {
17455         // This shuffle index refers to the inner shuffle N0. Lookup the inner
17456         // shuffle mask to identify which vector is actually referenced.
17457         Idx = OtherSV->getMaskElt(Idx);
17458         if (Idx < 0) {
17459           // Propagate Undef.
17460           Mask.push_back(Idx);
17461           continue;
17462         }
17463 
17464         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17465                                            : OtherSV->getOperand(1);
17466       } else {
17467         // This shuffle index references an element within N1.
17468         CurrentVec = N1;
17469       }
17470 
17471       // Simple case where 'CurrentVec' is UNDEF.
17472       if (CurrentVec.isUndef()) {
17473         Mask.push_back(-1);
17474         continue;
17475       }
17476 
17477       // Canonicalize the shuffle index. We don't know yet if CurrentVec
17478       // will be the first or second operand of the combined shuffle.
17479       Idx = Idx % NumElts;
17480       if (!SV0.getNode() || SV0 == CurrentVec) {
17481         // Ok. CurrentVec is the left hand side.
17482         // Update the mask accordingly.
17483         SV0 = CurrentVec;
17484         Mask.push_back(Idx);
17485         continue;
17486       }
17487 
17488       // Bail out if we cannot convert the shuffle pair into a single shuffle.
17489       if (SV1.getNode() && SV1 != CurrentVec)
17490         return SDValue();
17491 
17492       // Ok. CurrentVec is the right hand side.
17493       // Update the mask accordingly.
17494       SV1 = CurrentVec;
17495       Mask.push_back(Idx + NumElts);
17496     }
17497 
17498     // Check if all indices in Mask are Undef. In case, propagate Undef.
17499     bool isUndefMask = true;
17500     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17501       isUndefMask &= Mask[i] < 0;
17502 
17503     if (isUndefMask)
17504       return DAG.getUNDEF(VT);
17505 
17506     if (!SV0.getNode())
17507       SV0 = DAG.getUNDEF(VT);
17508     if (!SV1.getNode())
17509       SV1 = DAG.getUNDEF(VT);
17510 
17511     // Avoid introducing shuffles with illegal mask.
17512     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17513       ShuffleVectorSDNode::commuteMask(Mask);
17514 
17515       if (!TLI.isShuffleMaskLegal(Mask, VT))
17516         return SDValue();
17517 
17518       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17519       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17520       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17521       std::swap(SV0, SV1);
17522     }
17523 
17524     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17525     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17526     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17527     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17528   }
17529 
17530   return SDValue();
17531 }
17532 
17533 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17534   SDValue InVal = N->getOperand(0);
17535   EVT VT = N->getValueType(0);
17536 
17537   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17538   // with a VECTOR_SHUFFLE and possible truncate.
17539   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17540     SDValue InVec = InVal->getOperand(0);
17541     SDValue EltNo = InVal->getOperand(1);
17542     auto InVecT = InVec.getValueType();
17543     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17544       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17545       int Elt = C0->getZExtValue();
17546       NewMask[0] = Elt;
17547       SDValue Val;
17548       // If we have an implict truncate do truncate here as long as it's legal.
17549       // if it's not legal, this should
17550       if (VT.getScalarType() != InVal.getValueType() &&
17551           InVal.getValueType().isScalarInteger() &&
17552           isTypeLegal(VT.getScalarType())) {
17553         Val =
17554             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17555         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17556       }
17557       if (VT.getScalarType() == InVecT.getScalarType() &&
17558           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17559           TLI.isShuffleMaskLegal(NewMask, VT)) {
17560         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17561                                    DAG.getUNDEF(InVecT), NewMask);
17562         // If the initial vector is the correct size this shuffle is a
17563         // valid result.
17564         if (VT == InVecT)
17565           return Val;
17566         // If not we must truncate the vector.
17567         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17568           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17569           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17570           EVT SubVT =
17571               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17572                                VT.getVectorNumElements());
17573           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17574                             ZeroIdx);
17575           return Val;
17576         }
17577       }
17578     }
17579   }
17580 
17581   return SDValue();
17582 }
17583 
17584 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17585   EVT VT = N->getValueType(0);
17586   SDValue N0 = N->getOperand(0);
17587   SDValue N1 = N->getOperand(1);
17588   SDValue N2 = N->getOperand(2);
17589 
17590   // If inserting an UNDEF, just return the original vector.
17591   if (N1.isUndef())
17592     return N0;
17593 
17594   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
17595   // us to pull BITCASTs from input to output.
17596   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
17597     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
17598       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
17599 
17600   // If this is an insert of an extracted vector into an undef vector, we can
17601   // just use the input to the extract.
17602   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17603       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17604     return N1.getOperand(0);
17605 
17606   // If we are inserting a bitcast value into an undef, with the same
17607   // number of elements, just use the bitcast input of the extract.
17608   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17609   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17610   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17611       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17612       N1.getOperand(0).getOperand(1) == N2 &&
17613       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17614           VT.getVectorNumElements() &&
17615       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17616           VT.getSizeInBits()) {
17617     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17618   }
17619 
17620   // If both N1 and N2 are bitcast values on which insert_subvector
17621   // would makes sense, pull the bitcast through.
17622   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17623   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17624   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17625     SDValue CN0 = N0.getOperand(0);
17626     SDValue CN1 = N1.getOperand(0);
17627     EVT CN0VT = CN0.getValueType();
17628     EVT CN1VT = CN1.getValueType();
17629     if (CN0VT.isVector() && CN1VT.isVector() &&
17630         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17631         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17632       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17633                                       CN0.getValueType(), CN0, CN1, N2);
17634       return DAG.getBitcast(VT, NewINSERT);
17635     }
17636   }
17637 
17638   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17639   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17640   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17641   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17642       N0.getOperand(1).getValueType() == N1.getValueType() &&
17643       N0.getOperand(2) == N2)
17644     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
17645                        N1, N2);
17646 
17647   if (!isa<ConstantSDNode>(N2))
17648     return SDValue();
17649 
17650   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
17651 
17652   // Canonicalize insert_subvector dag nodes.
17653   // Example:
17654   // (insert_subvector (insert_subvector A, Idx0), Idx1)
17655   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
17656   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
17657       N1.getValueType() == N0.getOperand(1).getValueType() &&
17658       isa<ConstantSDNode>(N0.getOperand(2))) {
17659     unsigned OtherIdx = N0.getConstantOperandVal(2);
17660     if (InsIdx < OtherIdx) {
17661       // Swap nodes.
17662       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
17663                                   N0.getOperand(0), N1, N2);
17664       AddToWorklist(NewOp.getNode());
17665       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17666                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17667     }
17668   }
17669 
17670   // If the input vector is a concatenation, and the insert replaces
17671   // one of the pieces, we can optimize into a single concat_vectors.
17672   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17673       N0.getOperand(0).getValueType() == N1.getValueType()) {
17674     unsigned Factor = N1.getValueType().getVectorNumElements();
17675 
17676     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17677     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17678 
17679     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17680   }
17681 
17682   // Simplify source operands based on insertion.
17683   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17684     return SDValue(N, 0);
17685 
17686   return SDValue();
17687 }
17688 
17689 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17690   SDValue N0 = N->getOperand(0);
17691 
17692   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17693   if (N0->getOpcode() == ISD::FP16_TO_FP)
17694     return N0->getOperand(0);
17695 
17696   return SDValue();
17697 }
17698 
17699 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17700   SDValue N0 = N->getOperand(0);
17701 
17702   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17703   if (N0->getOpcode() == ISD::AND) {
17704     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17705     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17706       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17707                          N0.getOperand(0));
17708     }
17709   }
17710 
17711   return SDValue();
17712 }
17713 
17714 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17715 /// with the destination vector and a zero vector.
17716 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17717 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
17718 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17719   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17720 
17721   EVT VT = N->getValueType(0);
17722   SDValue LHS = N->getOperand(0);
17723   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
17724   SDLoc DL(N);
17725 
17726   // Make sure we're not running after operation legalization where it
17727   // may have custom lowered the vector shuffles.
17728   if (LegalOperations)
17729     return SDValue();
17730 
17731   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17732     return SDValue();
17733 
17734   EVT RVT = RHS.getValueType();
17735   unsigned NumElts = RHS.getNumOperands();
17736 
17737   // Attempt to create a valid clear mask, splitting the mask into
17738   // sub elements and checking to see if each is
17739   // all zeros or all ones - suitable for shuffle masking.
17740   auto BuildClearMask = [&](int Split) {
17741     int NumSubElts = NumElts * Split;
17742     int NumSubBits = RVT.getScalarSizeInBits() / Split;
17743 
17744     SmallVector<int, 8> Indices;
17745     for (int i = 0; i != NumSubElts; ++i) {
17746       int EltIdx = i / Split;
17747       int SubIdx = i % Split;
17748       SDValue Elt = RHS.getOperand(EltIdx);
17749       if (Elt.isUndef()) {
17750         Indices.push_back(-1);
17751         continue;
17752       }
17753 
17754       APInt Bits;
17755       if (isa<ConstantSDNode>(Elt))
17756         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17757       else if (isa<ConstantFPSDNode>(Elt))
17758         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17759       else
17760         return SDValue();
17761 
17762       // Extract the sub element from the constant bit mask.
17763       if (DAG.getDataLayout().isBigEndian()) {
17764         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17765       } else {
17766         Bits.lshrInPlace(SubIdx * NumSubBits);
17767       }
17768 
17769       if (Split > 1)
17770         Bits = Bits.trunc(NumSubBits);
17771 
17772       if (Bits.isAllOnesValue())
17773         Indices.push_back(i);
17774       else if (Bits == 0)
17775         Indices.push_back(i + NumSubElts);
17776       else
17777         return SDValue();
17778     }
17779 
17780     // Let's see if the target supports this vector_shuffle.
17781     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17782     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17783     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17784       return SDValue();
17785 
17786     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17787     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17788                                                    DAG.getBitcast(ClearVT, LHS),
17789                                                    Zero, Indices));
17790   };
17791 
17792   // Determine maximum split level (byte level masking).
17793   int MaxSplit = 1;
17794   if (RVT.getScalarSizeInBits() % 8 == 0)
17795     MaxSplit = RVT.getScalarSizeInBits() / 8;
17796 
17797   for (int Split = 1; Split <= MaxSplit; ++Split)
17798     if (RVT.getScalarSizeInBits() % Split == 0)
17799       if (SDValue S = BuildClearMask(Split))
17800         return S;
17801 
17802   return SDValue();
17803 }
17804 
17805 /// Visit a binary vector operation, like ADD.
17806 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17807   assert(N->getValueType(0).isVector() &&
17808          "SimplifyVBinOp only works on vectors!");
17809 
17810   SDValue LHS = N->getOperand(0);
17811   SDValue RHS = N->getOperand(1);
17812   SDValue Ops[] = {LHS, RHS};
17813 
17814   // See if we can constant fold the vector operation.
17815   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17816           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17817     return Fold;
17818 
17819   // Type legalization might introduce new shuffles in the DAG.
17820   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17821   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
17822   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17823       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17824       LHS.getOperand(1).isUndef() &&
17825       RHS.getOperand(1).isUndef()) {
17826     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17827     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17828 
17829     if (SVN0->getMask().equals(SVN1->getMask())) {
17830       EVT VT = N->getValueType(0);
17831       SDValue UndefVector = LHS.getOperand(1);
17832       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17833                                      LHS.getOperand(0), RHS.getOperand(0),
17834                                      N->getFlags());
17835       AddUsersToWorklist(N);
17836       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17837                                   SVN0->getMask());
17838     }
17839   }
17840 
17841   return SDValue();
17842 }
17843 
17844 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17845                                     SDValue N2) {
17846   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17847 
17848   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17849                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
17850 
17851   // If we got a simplified select_cc node back from SimplifySelectCC, then
17852   // break it down into a new SETCC node, and a new SELECT node, and then return
17853   // the SELECT node, since we were called with a SELECT node.
17854   if (SCC.getNode()) {
17855     // Check to see if we got a select_cc back (to turn into setcc/select).
17856     // Otherwise, just return whatever node we got back, like fabs.
17857     if (SCC.getOpcode() == ISD::SELECT_CC) {
17858       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17859                                   N0.getValueType(),
17860                                   SCC.getOperand(0), SCC.getOperand(1),
17861                                   SCC.getOperand(4));
17862       AddToWorklist(SETCC.getNode());
17863       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17864                            SCC.getOperand(2), SCC.getOperand(3));
17865     }
17866 
17867     return SCC;
17868   }
17869   return SDValue();
17870 }
17871 
17872 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17873 /// being selected between, see if we can simplify the select.  Callers of this
17874 /// should assume that TheSelect is deleted if this returns true.  As such, they
17875 /// should return the appropriate thing (e.g. the node) back to the top-level of
17876 /// the DAG combiner loop to avoid it being looked at.
17877 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17878                                     SDValue RHS) {
17879   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17880   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17881   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17882     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17883       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17884       SDValue Sqrt = RHS;
17885       ISD::CondCode CC;
17886       SDValue CmpLHS;
17887       const ConstantFPSDNode *Zero = nullptr;
17888 
17889       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17890         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17891         CmpLHS = TheSelect->getOperand(0);
17892         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17893       } else {
17894         // SELECT or VSELECT
17895         SDValue Cmp = TheSelect->getOperand(0);
17896         if (Cmp.getOpcode() == ISD::SETCC) {
17897           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17898           CmpLHS = Cmp.getOperand(0);
17899           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17900         }
17901       }
17902       if (Zero && Zero->isZero() &&
17903           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17904           CC == ISD::SETULT || CC == ISD::SETLT)) {
17905         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17906         CombineTo(TheSelect, Sqrt);
17907         return true;
17908       }
17909     }
17910   }
17911   // Cannot simplify select with vector condition
17912   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17913 
17914   // If this is a select from two identical things, try to pull the operation
17915   // through the select.
17916   if (LHS.getOpcode() != RHS.getOpcode() ||
17917       !LHS.hasOneUse() || !RHS.hasOneUse())
17918     return false;
17919 
17920   // If this is a load and the token chain is identical, replace the select
17921   // of two loads with a load through a select of the address to load from.
17922   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17923   // constants have been dropped into the constant pool.
17924   if (LHS.getOpcode() == ISD::LOAD) {
17925     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17926     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17927 
17928     // Token chains must be identical.
17929     if (LHS.getOperand(0) != RHS.getOperand(0) ||
17930         // Do not let this transformation reduce the number of volatile loads.
17931         LLD->isVolatile() || RLD->isVolatile() ||
17932         // FIXME: If either is a pre/post inc/dec load,
17933         // we'd need to split out the address adjustment.
17934         LLD->isIndexed() || RLD->isIndexed() ||
17935         // If this is an EXTLOAD, the VT's must match.
17936         LLD->getMemoryVT() != RLD->getMemoryVT() ||
17937         // If this is an EXTLOAD, the kind of extension must match.
17938         (LLD->getExtensionType() != RLD->getExtensionType() &&
17939          // The only exception is if one of the extensions is anyext.
17940          LLD->getExtensionType() != ISD::EXTLOAD &&
17941          RLD->getExtensionType() != ISD::EXTLOAD) ||
17942         // FIXME: this discards src value information.  This is
17943         // over-conservative. It would be beneficial to be able to remember
17944         // both potential memory locations.  Since we are discarding
17945         // src value info, don't do the transformation if the memory
17946         // locations are not in the default address space.
17947         LLD->getPointerInfo().getAddrSpace() != 0 ||
17948         RLD->getPointerInfo().getAddrSpace() != 0 ||
17949         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17950                                       LLD->getBasePtr().getValueType()))
17951       return false;
17952 
17953     // The loads must not depend on one another.
17954     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
17955       return false;
17956 
17957     // Check that the select condition doesn't reach either load.  If so,
17958     // folding this will induce a cycle into the DAG.  If not, this is safe to
17959     // xform, so create a select of the addresses.
17960 
17961     SmallPtrSet<const SDNode *, 32> Visited;
17962     SmallVector<const SDNode *, 16> Worklist;
17963 
17964     // Always fail if LLD and RLD are not independent. TheSelect is a
17965     // predecessor to all Nodes in question so we need not search past it.
17966 
17967     Visited.insert(TheSelect);
17968     Worklist.push_back(LLD);
17969     Worklist.push_back(RLD);
17970 
17971     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
17972         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
17973       return false;
17974 
17975     SDValue Addr;
17976     if (TheSelect->getOpcode() == ISD::SELECT) {
17977       // We cannot do this optimization if any pair of {RLD, LLD} is a
17978       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
17979       // Loads, we only need to check if CondNode is a successor to one of the
17980       // loads. We can further avoid this if there's no use of their chain
17981       // value.
17982       SDNode *CondNode = TheSelect->getOperand(0).getNode();
17983       Worklist.push_back(CondNode);
17984 
17985       if ((LLD->hasAnyUseOfValue(1) &&
17986            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
17987           (RLD->hasAnyUseOfValue(1) &&
17988            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
17989         return false;
17990 
17991       Addr = DAG.getSelect(SDLoc(TheSelect),
17992                            LLD->getBasePtr().getValueType(),
17993                            TheSelect->getOperand(0), LLD->getBasePtr(),
17994                            RLD->getBasePtr());
17995     } else {  // Otherwise SELECT_CC
17996       // We cannot do this optimization if any pair of {RLD, LLD} is a
17997       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
17998       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
17999       // one of the loads. We can further avoid this if there's no use of their
18000       // chain value.
18001 
18002       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18003       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18004       Worklist.push_back(CondLHS);
18005       Worklist.push_back(CondRHS);
18006 
18007       if ((LLD->hasAnyUseOfValue(1) &&
18008            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18009           (RLD->hasAnyUseOfValue(1) &&
18010            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18011         return false;
18012 
18013       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18014                          LLD->getBasePtr().getValueType(),
18015                          TheSelect->getOperand(0),
18016                          TheSelect->getOperand(1),
18017                          LLD->getBasePtr(), RLD->getBasePtr(),
18018                          TheSelect->getOperand(4));
18019     }
18020 
18021     SDValue Load;
18022     // It is safe to replace the two loads if they have different alignments,
18023     // but the new load must be the minimum (most restrictive) alignment of the
18024     // inputs.
18025     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18026     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18027     if (!RLD->isInvariant())
18028       MMOFlags &= ~MachineMemOperand::MOInvariant;
18029     if (!RLD->isDereferenceable())
18030       MMOFlags &= ~MachineMemOperand::MODereferenceable;
18031     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18032       // FIXME: Discards pointer and AA info.
18033       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18034                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18035                          MMOFlags);
18036     } else {
18037       // FIXME: Discards pointer and AA info.
18038       Load = DAG.getExtLoad(
18039           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18040                                                   : LLD->getExtensionType(),
18041           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18042           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18043     }
18044 
18045     // Users of the select now use the result of the load.
18046     CombineTo(TheSelect, Load);
18047 
18048     // Users of the old loads now use the new load's chain.  We know the
18049     // old-load value is dead now.
18050     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18051     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18052     return true;
18053   }
18054 
18055   return false;
18056 }
18057 
18058 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18059 /// bitwise 'and'.
18060 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18061                                             SDValue N1, SDValue N2, SDValue N3,
18062                                             ISD::CondCode CC) {
18063   // If this is a select where the false operand is zero and the compare is a
18064   // check of the sign bit, see if we can perform the "gzip trick":
18065   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18066   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18067   EVT XType = N0.getValueType();
18068   EVT AType = N2.getValueType();
18069   if (!isNullConstant(N3) || !XType.bitsGE(AType))
18070     return SDValue();
18071 
18072   // If the comparison is testing for a positive value, we have to invert
18073   // the sign bit mask, so only do that transform if the target has a bitwise
18074   // 'and not' instruction (the invert is free).
18075   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18076     // (X > -1) ? A : 0
18077     // (X >  0) ? X : 0 <-- This is canonical signed max.
18078     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18079       return SDValue();
18080   } else if (CC == ISD::SETLT) {
18081     // (X <  0) ? A : 0
18082     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
18083     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18084       return SDValue();
18085   } else {
18086     return SDValue();
18087   }
18088 
18089   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18090   // constant.
18091   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18092   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18093   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18094     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18095     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18096     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18097     AddToWorklist(Shift.getNode());
18098 
18099     if (XType.bitsGT(AType)) {
18100       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18101       AddToWorklist(Shift.getNode());
18102     }
18103 
18104     if (CC == ISD::SETGT)
18105       Shift = DAG.getNOT(DL, Shift, AType);
18106 
18107     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18108   }
18109 
18110   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18111   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18112   AddToWorklist(Shift.getNode());
18113 
18114   if (XType.bitsGT(AType)) {
18115     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18116     AddToWorklist(Shift.getNode());
18117   }
18118 
18119   if (CC == ISD::SETGT)
18120     Shift = DAG.getNOT(DL, Shift, AType);
18121 
18122   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18123 }
18124 
18125 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18126 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18127 /// in it. This may be a win when the constant is not otherwise available
18128 /// because it replaces two constant pool loads with one.
18129 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
18130     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
18131     ISD::CondCode CC) {
18132   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
18133     return SDValue();
18134 
18135   // If we are before legalize types, we want the other legalization to happen
18136   // first (for example, to avoid messing with soft float).
18137   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
18138   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
18139   EVT VT = N2.getValueType();
18140   if (!TV || !FV || !TLI.isTypeLegal(VT))
18141     return SDValue();
18142 
18143   // If a constant can be materialized without loads, this does not make sense.
18144   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
18145       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
18146       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
18147     return SDValue();
18148 
18149   // If both constants have multiple uses, then we won't need to do an extra
18150   // load. The values are likely around in registers for other users.
18151   if (!TV->hasOneUse() && !FV->hasOneUse())
18152     return SDValue();
18153 
18154   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
18155                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
18156   Type *FPTy = Elts[0]->getType();
18157   const DataLayout &TD = DAG.getDataLayout();
18158 
18159   // Create a ConstantArray of the two constants.
18160   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18161   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18162                                       TD.getPrefTypeAlignment(FPTy));
18163   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18164 
18165   // Get offsets to the 0 and 1 elements of the array, so we can select between
18166   // them.
18167   SDValue Zero = DAG.getIntPtrConstant(0, DL);
18168   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18169   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18170   SDValue Cond =
18171       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
18172   AddToWorklist(Cond.getNode());
18173   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
18174   AddToWorklist(CstOffset.getNode());
18175   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
18176   AddToWorklist(CPIdx.getNode());
18177   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18178                      MachinePointerInfo::getConstantPool(
18179                          DAG.getMachineFunction()), Alignment);
18180 }
18181 
18182 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18183 /// where 'cond' is the comparison specified by CC.
18184 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18185                                       SDValue N2, SDValue N3, ISD::CondCode CC,
18186                                       bool NotExtCompare) {
18187   // (x ? y : y) -> y.
18188   if (N2 == N3) return N2;
18189 
18190   EVT CmpOpVT = N0.getValueType();
18191   EVT VT = N2.getValueType();
18192   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18193   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18194   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
18195 
18196   // Determine if the condition we're dealing with is constant.
18197   SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
18198                               false);
18199   if (SCC.getNode()) AddToWorklist(SCC.getNode());
18200 
18201   if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18202     // fold select_cc true, x, y -> x
18203     // fold select_cc false, x, y -> y
18204     return !SCCC->isNullValue() ? N2 : N3;
18205   }
18206 
18207   if (SDValue V =
18208           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
18209     return V;
18210 
18211   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18212     return V;
18213 
18214   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18215   // where y is has a single bit set.
18216   // A plaintext description would be, we can turn the SELECT_CC into an AND
18217   // when the condition can be materialized as an all-ones register.  Any
18218   // single bit-test can be materialized as an all-ones register with
18219   // shift-left and shift-right-arith.
18220   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18221       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18222     SDValue AndLHS = N0->getOperand(0);
18223     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18224     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18225       // Shift the tested bit over the sign bit.
18226       const APInt &AndMask = ConstAndRHS->getAPIntValue();
18227       SDValue ShlAmt =
18228         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18229                         getShiftAmountTy(AndLHS.getValueType()));
18230       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18231 
18232       // Now arithmetic right shift it all the way over, so the result is either
18233       // all-ones, or zero.
18234       SDValue ShrAmt =
18235         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18236                         getShiftAmountTy(Shl.getValueType()));
18237       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18238 
18239       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18240     }
18241   }
18242 
18243   // fold select C, 16, 0 -> shl C, 4
18244   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
18245   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
18246 
18247   if ((Fold || Swap) &&
18248       TLI.getBooleanContents(CmpOpVT) ==
18249           TargetLowering::ZeroOrOneBooleanContent &&
18250       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
18251 
18252     if (Swap) {
18253       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
18254       std::swap(N2C, N3C);
18255     }
18256 
18257     // If the caller doesn't want us to simplify this into a zext of a compare,
18258     // don't do it.
18259     if (NotExtCompare && N2C->isOne())
18260       return SDValue();
18261 
18262     SDValue Temp, SCC;
18263     // zext (setcc n0, n1)
18264     if (LegalTypes) {
18265       SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
18266       if (VT.bitsLT(SCC.getValueType()))
18267         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
18268       else
18269         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18270     } else {
18271       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18272       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18273     }
18274 
18275     AddToWorklist(SCC.getNode());
18276     AddToWorklist(Temp.getNode());
18277 
18278     if (N2C->isOne())
18279       return Temp;
18280 
18281     // shl setcc result by log2 n2c
18282     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
18283                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
18284                                        SDLoc(Temp),
18285                                        getShiftAmountTy(Temp.getValueType())));
18286   }
18287 
18288   // Check to see if this is an integer abs.
18289   // select_cc setg[te] X,  0,  X, -X ->
18290   // select_cc setgt    X, -1,  X, -X ->
18291   // select_cc setl[te] X,  0, -X,  X ->
18292   // select_cc setlt    X,  1, -X,  X ->
18293   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18294   if (N1C) {
18295     ConstantSDNode *SubC = nullptr;
18296     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18297          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18298         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18299       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18300     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18301               (N1C->isOne() && CC == ISD::SETLT)) &&
18302              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18303       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18304 
18305     if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
18306       SDLoc DL(N0);
18307       SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
18308                                   DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
18309                                                   DL,
18310                                                   getShiftAmountTy(CmpOpVT)));
18311       SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
18312       AddToWorklist(Shift.getNode());
18313       AddToWorklist(Add.getNode());
18314       return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
18315     }
18316   }
18317 
18318   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18319   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18320   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18321   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18322   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18323   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18324   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18325   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18326   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18327     SDValue ValueOnZero = N2;
18328     SDValue Count = N3;
18329     // If the condition is NE instead of E, swap the operands.
18330     if (CC == ISD::SETNE)
18331       std::swap(ValueOnZero, Count);
18332     // Check if the value on zero is a constant equal to the bits in the type.
18333     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18334       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18335         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18336         // legal, combine to just cttz.
18337         if ((Count.getOpcode() == ISD::CTTZ ||
18338              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18339             N0 == Count.getOperand(0) &&
18340             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18341           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18342         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18343         // legal, combine to just ctlz.
18344         if ((Count.getOpcode() == ISD::CTLZ ||
18345              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18346             N0 == Count.getOperand(0) &&
18347             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18348           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18349       }
18350     }
18351   }
18352 
18353   return SDValue();
18354 }
18355 
18356 /// This is a stub for TargetLowering::SimplifySetCC.
18357 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18358                                    ISD::CondCode Cond, const SDLoc &DL,
18359                                    bool foldBooleans) {
18360   TargetLowering::DAGCombinerInfo
18361     DagCombineInfo(DAG, Level, false, this);
18362   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18363 }
18364 
18365 /// Given an ISD::SDIV node expressing a divide by constant, return
18366 /// a DAG expression to select that will generate the same value by multiplying
18367 /// by a magic number.
18368 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18369 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18370   // when optimising for minimum size, we don't want to expand a div to a mul
18371   // and a shift.
18372   if (DAG.getMachineFunction().getFunction().optForMinSize())
18373     return SDValue();
18374 
18375   SmallVector<SDNode *, 8> Built;
18376   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18377     for (SDNode *N : Built)
18378       AddToWorklist(N);
18379     return S;
18380   }
18381 
18382   return SDValue();
18383 }
18384 
18385 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18386 /// DAG expression that will generate the same value by right shifting.
18387 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18388   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18389   if (!C)
18390     return SDValue();
18391 
18392   // Avoid division by zero.
18393   if (C->isNullValue())
18394     return SDValue();
18395 
18396   SmallVector<SDNode *, 8> Built;
18397   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18398     for (SDNode *N : Built)
18399       AddToWorklist(N);
18400     return S;
18401   }
18402 
18403   return SDValue();
18404 }
18405 
18406 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18407 /// expression that will generate the same value by multiplying by a magic
18408 /// number.
18409 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18410 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18411   // when optimising for minimum size, we don't want to expand a div to a mul
18412   // and a shift.
18413   if (DAG.getMachineFunction().getFunction().optForMinSize())
18414     return SDValue();
18415 
18416   SmallVector<SDNode *, 8> Built;
18417   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18418     for (SDNode *N : Built)
18419       AddToWorklist(N);
18420     return S;
18421   }
18422 
18423   return SDValue();
18424 }
18425 
18426 /// Determines the LogBase2 value for a non-null input value using the
18427 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18428 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18429   EVT VT = V.getValueType();
18430   unsigned EltBits = VT.getScalarSizeInBits();
18431   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18432   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18433   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18434   return LogBase2;
18435 }
18436 
18437 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18438 /// For the reciprocal, we need to find the zero of the function:
18439 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
18440 ///     =>
18441 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18442 ///     does not require additional intermediate precision]
18443 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18444   if (Level >= AfterLegalizeDAG)
18445     return SDValue();
18446 
18447   // TODO: Handle half and/or extended types?
18448   EVT VT = Op.getValueType();
18449   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18450     return SDValue();
18451 
18452   // If estimates are explicitly disabled for this function, we're done.
18453   MachineFunction &MF = DAG.getMachineFunction();
18454   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18455   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18456     return SDValue();
18457 
18458   // Estimates may be explicitly enabled for this type with a custom number of
18459   // refinement steps.
18460   int Iterations = TLI.getDivRefinementSteps(VT, MF);
18461   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18462     AddToWorklist(Est.getNode());
18463 
18464     if (Iterations) {
18465       EVT VT = Op.getValueType();
18466       SDLoc DL(Op);
18467       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18468 
18469       // Newton iterations: Est = Est + Est (1 - Arg * Est)
18470       for (int i = 0; i < Iterations; ++i) {
18471         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18472         AddToWorklist(NewEst.getNode());
18473 
18474         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18475         AddToWorklist(NewEst.getNode());
18476 
18477         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18478         AddToWorklist(NewEst.getNode());
18479 
18480         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18481         AddToWorklist(Est.getNode());
18482       }
18483     }
18484     return Est;
18485   }
18486 
18487   return SDValue();
18488 }
18489 
18490 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18491 /// For the reciprocal sqrt, we need to find the zero of the function:
18492 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18493 ///     =>
18494 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18495 /// As a result, we precompute A/2 prior to the iteration loop.
18496 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18497                                          unsigned Iterations,
18498                                          SDNodeFlags Flags, bool Reciprocal) {
18499   EVT VT = Arg.getValueType();
18500   SDLoc DL(Arg);
18501   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18502 
18503   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18504   // this entire sequence requires only one FP constant.
18505   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18506   AddToWorklist(HalfArg.getNode());
18507 
18508   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18509   AddToWorklist(HalfArg.getNode());
18510 
18511   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18512   for (unsigned i = 0; i < Iterations; ++i) {
18513     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18514     AddToWorklist(NewEst.getNode());
18515 
18516     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18517     AddToWorklist(NewEst.getNode());
18518 
18519     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18520     AddToWorklist(NewEst.getNode());
18521 
18522     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18523     AddToWorklist(Est.getNode());
18524   }
18525 
18526   // If non-reciprocal square root is requested, multiply the result by Arg.
18527   if (!Reciprocal) {
18528     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18529     AddToWorklist(Est.getNode());
18530   }
18531 
18532   return Est;
18533 }
18534 
18535 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18536 /// For the reciprocal sqrt, we need to find the zero of the function:
18537 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18538 ///     =>
18539 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18540 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18541                                          unsigned Iterations,
18542                                          SDNodeFlags Flags, bool Reciprocal) {
18543   EVT VT = Arg.getValueType();
18544   SDLoc DL(Arg);
18545   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18546   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18547 
18548   // This routine must enter the loop below to work correctly
18549   // when (Reciprocal == false).
18550   assert(Iterations > 0);
18551 
18552   // Newton iterations for reciprocal square root:
18553   // E = (E * -0.5) * ((A * E) * E + -3.0)
18554   for (unsigned i = 0; i < Iterations; ++i) {
18555     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18556     AddToWorklist(AE.getNode());
18557 
18558     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18559     AddToWorklist(AEE.getNode());
18560 
18561     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18562     AddToWorklist(RHS.getNode());
18563 
18564     // When calculating a square root at the last iteration build:
18565     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18566     // (notice a common subexpression)
18567     SDValue LHS;
18568     if (Reciprocal || (i + 1) < Iterations) {
18569       // RSQRT: LHS = (E * -0.5)
18570       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18571     } else {
18572       // SQRT: LHS = (A * E) * -0.5
18573       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18574     }
18575     AddToWorklist(LHS.getNode());
18576 
18577     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18578     AddToWorklist(Est.getNode());
18579   }
18580 
18581   return Est;
18582 }
18583 
18584 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18585 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18586 /// Op can be zero.
18587 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18588                                            bool Reciprocal) {
18589   if (Level >= AfterLegalizeDAG)
18590     return SDValue();
18591 
18592   // TODO: Handle half and/or extended types?
18593   EVT VT = Op.getValueType();
18594   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18595     return SDValue();
18596 
18597   // If estimates are explicitly disabled for this function, we're done.
18598   MachineFunction &MF = DAG.getMachineFunction();
18599   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18600   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18601     return SDValue();
18602 
18603   // Estimates may be explicitly enabled for this type with a custom number of
18604   // refinement steps.
18605   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18606 
18607   bool UseOneConstNR = false;
18608   if (SDValue Est =
18609       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18610                           Reciprocal)) {
18611     AddToWorklist(Est.getNode());
18612 
18613     if (Iterations) {
18614       Est = UseOneConstNR
18615             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18616             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18617 
18618       if (!Reciprocal) {
18619         // The estimate is now completely wrong if the input was exactly 0.0 or
18620         // possibly a denormal. Force the answer to 0.0 for those cases.
18621         EVT VT = Op.getValueType();
18622         SDLoc DL(Op);
18623         EVT CCVT = getSetCCResultType(VT);
18624         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18625         const Function &F = DAG.getMachineFunction().getFunction();
18626         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18627         if (Denorms.getValueAsString().equals("ieee")) {
18628           // fabs(X) < SmallestNormal ? 0.0 : Est
18629           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18630           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18631           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18632           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18633           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18634           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
18635           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
18636           AddToWorklist(Fabs.getNode());
18637           AddToWorklist(IsDenorm.getNode());
18638           AddToWorklist(Est.getNode());
18639         } else {
18640           // X == 0.0 ? 0.0 : Est
18641           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18642           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
18643           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
18644           AddToWorklist(IsZero.getNode());
18645           AddToWorklist(Est.getNode());
18646         }
18647       }
18648     }
18649     return Est;
18650   }
18651 
18652   return SDValue();
18653 }
18654 
18655 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18656   return buildSqrtEstimateImpl(Op, Flags, true);
18657 }
18658 
18659 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18660   return buildSqrtEstimateImpl(Op, Flags, false);
18661 }
18662 
18663 /// Return true if there is any possibility that the two addresses overlap.
18664 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
18665   // If they are the same then they must be aliases.
18666   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
18667 
18668   // If they are both volatile then they cannot be reordered.
18669   if (Op0->isVolatile() && Op1->isVolatile()) return true;
18670 
18671   // If one operation reads from invariant memory, and the other may store, they
18672   // cannot alias. These should really be checking the equivalent of mayWrite,
18673   // but it only matters for memory nodes other than load /store.
18674   if (Op0->isInvariant() && Op1->writeMem())
18675     return false;
18676 
18677   if (Op1->isInvariant() && Op0->writeMem())
18678     return false;
18679 
18680   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
18681   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
18682 
18683   // Check for BaseIndexOffset matching.
18684   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
18685   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
18686   int64_t PtrDiff;
18687   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
18688     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
18689       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
18690 
18691     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
18692     // able to calculate their relative offset if at least one arises
18693     // from an alloca. However, these allocas cannot overlap and we
18694     // can infer there is no alias.
18695     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18696       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18697         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18698         // If the base are the same frame index but the we couldn't find a
18699         // constant offset, (indices are different) be conservative.
18700         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18701                        !MFI.isFixedObjectIndex(B->getIndex())))
18702           return false;
18703       }
18704 
18705     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18706     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18707     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18708     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18709     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18710     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18711 
18712     // If of mismatched base types or checkable indices we can check
18713     // they do not alias.
18714     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18715          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18716         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18717       return false;
18718   }
18719 
18720   // If we know required SrcValue1 and SrcValue2 have relatively large
18721   // alignment compared to the size and offset of the access, we may be able
18722   // to prove they do not alias. This check is conservative for now to catch
18723   // cases created by splitting vector types.
18724   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18725   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18726   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18727   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18728   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18729       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18730     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18731     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18732 
18733     // There is no overlap between these relatively aligned accesses of
18734     // similar size. Return no alias.
18735     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18736         (OffAlign1 + NumBytes1) <= OffAlign0)
18737       return false;
18738   }
18739 
18740   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18741                    ? CombinerGlobalAA
18742                    : DAG.getSubtarget().useAA();
18743 #ifndef NDEBUG
18744   if (CombinerAAOnlyFunc.getNumOccurrences() &&
18745       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18746     UseAA = false;
18747 #endif
18748 
18749   if (UseAA && AA &&
18750       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18751     // Use alias analysis information.
18752     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18753     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18754     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18755     AliasResult AAResult =
18756         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18757                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18758                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18759                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18760     if (AAResult == NoAlias)
18761       return false;
18762   }
18763 
18764   // Otherwise we have to assume they alias.
18765   return true;
18766 }
18767 
18768 /// Walk up chain skipping non-aliasing memory nodes,
18769 /// looking for aliasing nodes and adding them to the Aliases vector.
18770 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18771                                    SmallVectorImpl<SDValue> &Aliases) {
18772   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
18773   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
18774 
18775   // Get alias information for node.
18776   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18777 
18778   // Starting off.
18779   Chains.push_back(OriginalChain);
18780   unsigned Depth = 0;
18781 
18782   // Look at each chain and determine if it is an alias.  If so, add it to the
18783   // aliases list.  If not, then continue up the chain looking for the next
18784   // candidate.
18785   while (!Chains.empty()) {
18786     SDValue Chain = Chains.pop_back_val();
18787 
18788     // For TokenFactor nodes, look at each operand and only continue up the
18789     // chain until we reach the depth limit.
18790     //
18791     // FIXME: The depth check could be made to return the last non-aliasing
18792     // chain we found before we hit a tokenfactor rather than the original
18793     // chain.
18794     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18795       Aliases.clear();
18796       Aliases.push_back(OriginalChain);
18797       return;
18798     }
18799 
18800     // Don't bother if we've been before.
18801     if (!Visited.insert(Chain.getNode()).second)
18802       continue;
18803 
18804     switch (Chain.getOpcode()) {
18805     case ISD::EntryToken:
18806       // Entry token is ideal chain operand, but handled in FindBetterChain.
18807       break;
18808 
18809     case ISD::LOAD:
18810     case ISD::STORE: {
18811       // Get alias information for Chain.
18812       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18813           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18814 
18815       // If chain is alias then stop here.
18816       if (!(IsLoad && IsOpLoad) &&
18817           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18818         Aliases.push_back(Chain);
18819       } else {
18820         // Look further up the chain.
18821         Chains.push_back(Chain.getOperand(0));
18822         ++Depth;
18823       }
18824       break;
18825     }
18826 
18827     case ISD::TokenFactor:
18828       // We have to check each of the operands of the token factor for "small"
18829       // token factors, so we queue them up.  Adding the operands to the queue
18830       // (stack) in reverse order maintains the original order and increases the
18831       // likelihood that getNode will find a matching token factor (CSE.)
18832       if (Chain.getNumOperands() > 16) {
18833         Aliases.push_back(Chain);
18834         break;
18835       }
18836       for (unsigned n = Chain.getNumOperands(); n;)
18837         Chains.push_back(Chain.getOperand(--n));
18838       ++Depth;
18839       break;
18840 
18841     case ISD::CopyFromReg:
18842       // Forward past CopyFromReg.
18843       Chains.push_back(Chain.getOperand(0));
18844       ++Depth;
18845       break;
18846 
18847     default:
18848       // For all other instructions we will just have to take what we can get.
18849       Aliases.push_back(Chain);
18850       break;
18851     }
18852   }
18853 }
18854 
18855 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18856 /// (aliasing node.)
18857 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18858   if (OptLevel == CodeGenOpt::None)
18859     return OldChain;
18860 
18861   // Ops for replacing token factor.
18862   SmallVector<SDValue, 8> Aliases;
18863 
18864   // Accumulate all the aliases to this node.
18865   GatherAllAliases(N, OldChain, Aliases);
18866 
18867   // If no operands then chain to entry token.
18868   if (Aliases.size() == 0)
18869     return DAG.getEntryNode();
18870 
18871   // If a single operand then chain to it.  We don't need to revisit it.
18872   if (Aliases.size() == 1)
18873     return Aliases[0];
18874 
18875   // Construct a custom tailored token factor.
18876   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18877 }
18878 
18879 // TODO: Replace with with std::monostate when we move to C++17.
18880 struct UnitT { } Unit;
18881 bool operator==(const UnitT &, const UnitT &) { return true; }
18882 bool operator!=(const UnitT &, const UnitT &) { return false; }
18883 
18884 // This function tries to collect a bunch of potentially interesting
18885 // nodes to improve the chains of, all at once. This might seem
18886 // redundant, as this function gets called when visiting every store
18887 // node, so why not let the work be done on each store as it's visited?
18888 //
18889 // I believe this is mainly important because MergeConsecutiveStores
18890 // is unable to deal with merging stores of different sizes, so unless
18891 // we improve the chains of all the potential candidates up-front
18892 // before running MergeConsecutiveStores, it might only see some of
18893 // the nodes that will eventually be candidates, and then not be able
18894 // to go from a partially-merged state to the desired final
18895 // fully-merged state.
18896 
18897 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
18898   SmallVector<StoreSDNode *, 8> ChainedStores;
18899   StoreSDNode *STChain = St;
18900   // Intervals records which offsets from BaseIndex have been covered. In
18901   // the common case, every store writes to the immediately previous address
18902   // space and thus merged with the previous interval at insertion time.
18903 
18904   using IMap =
18905       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
18906   IMap::Allocator A;
18907   IMap Intervals(A);
18908 
18909   // This holds the base pointer, index, and the offset in bytes from the base
18910   // pointer.
18911   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18912 
18913   // We must have a base and an offset.
18914   if (!BasePtr.getBase().getNode())
18915     return false;
18916 
18917   // Do not handle stores to undef base pointers.
18918   if (BasePtr.getBase().isUndef())
18919     return false;
18920 
18921   // Add ST's interval.
18922   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
18923 
18924   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
18925     // If the chain has more than one use, then we can't reorder the mem ops.
18926     if (!SDValue(Chain, 0)->hasOneUse())
18927       break;
18928     if (Chain->isVolatile() || Chain->isIndexed())
18929       break;
18930 
18931     // Find the base pointer and offset for this memory node.
18932     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
18933     // Check that the base pointer is the same as the original one.
18934     int64_t Offset;
18935     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
18936       break;
18937     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
18938     // Make sure we don't overlap with other intervals by checking the ones to
18939     // the left or right before inserting.
18940     auto I = Intervals.find(Offset);
18941     // If there's a next interval, we should end before it.
18942     if (I != Intervals.end() && I.start() < (Offset + Length))
18943       break;
18944     // If there's a previous interval, we should start after it.
18945     if (I != Intervals.begin() && (--I).stop() <= Offset)
18946       break;
18947     Intervals.insert(Offset, Offset + Length, Unit);
18948 
18949     ChainedStores.push_back(Chain);
18950     STChain = Chain;
18951   }
18952 
18953   // If we didn't find a chained store, exit.
18954   if (ChainedStores.size() == 0)
18955     return false;
18956 
18957   // Improve all chained stores (St and ChainedStores members) starting from
18958   // where the store chain ended and return single TokenFactor.
18959   SDValue NewChain = STChain->getChain();
18960   SmallVector<SDValue, 8> TFOps;
18961   for (unsigned I = ChainedStores.size(); I;) {
18962     StoreSDNode *S = ChainedStores[--I];
18963     SDValue BetterChain = FindBetterChain(S, NewChain);
18964     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
18965         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
18966     TFOps.push_back(SDValue(S, 0));
18967     ChainedStores[I] = S;
18968   }
18969 
18970   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
18971   SDValue BetterChain = FindBetterChain(St, NewChain);
18972   SDValue NewST;
18973   if (St->isTruncatingStore())
18974     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
18975                               St->getBasePtr(), St->getMemoryVT(),
18976                               St->getMemOperand());
18977   else
18978     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
18979                          St->getBasePtr(), St->getMemOperand());
18980 
18981   TFOps.push_back(NewST);
18982 
18983   // If we improved every element of TFOps, then we've lost the dependence on
18984   // NewChain to successors of St and we need to add it back to TFOps. Do so at
18985   // the beginning to keep relative order consistent with FindBetterChains.
18986   auto hasImprovedChain = [&](SDValue ST) -> bool {
18987     return ST->getOperand(0) != NewChain;
18988   };
18989   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
18990   if (AddNewChain)
18991     TFOps.insert(TFOps.begin(), NewChain);
18992 
18993   SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
18994   CombineTo(St, TF);
18995 
18996   AddToWorklist(STChain);
18997   // Add TF operands worklist in reverse order.
18998   for (auto I = TF->getNumOperands(); I;)
18999     AddToWorklist(TF->getOperand(--I).getNode());
19000   AddToWorklist(TF.getNode());
19001   return true;
19002 }
19003 
19004 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
19005   if (OptLevel == CodeGenOpt::None)
19006     return false;
19007 
19008   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19009 
19010   // We must have a base and an offset.
19011   if (!BasePtr.getBase().getNode())
19012     return false;
19013 
19014   // Do not handle stores to undef base pointers.
19015   if (BasePtr.getBase().isUndef())
19016     return false;
19017 
19018   // Directly improve a chain of disjoint stores starting at St.
19019   if (parallelizeChainedStores(St))
19020     return true;
19021 
19022   // Improve St's Chain..
19023   SDValue BetterChain = FindBetterChain(St, St->getChain());
19024   if (St->getChain() != BetterChain) {
19025     replaceStoreChain(St, BetterChain);
19026     return true;
19027   }
19028   return false;
19029 }
19030 
19031 /// This is the entry point for the file.
19032 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
19033                            CodeGenOpt::Level OptLevel) {
19034   /// This is the main entry point to this class.
19035   DAGCombiner(*this, AA, OptLevel).Run(Level);
19036 }
19037