1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined   , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116   class DAGCombiner {
117     SelectionDAG &DAG;
118     const TargetLowering &TLI;
119     CombineLevel Level;
120     CodeGenOpt::Level OptLevel;
121     bool LegalOperations = false;
122     bool LegalTypes = false;
123     bool ForCodeSize;
124 
125     /// Worklist of all of the nodes that need to be simplified.
126     ///
127     /// This must behave as a stack -- new nodes to process are pushed onto the
128     /// back and when processing we pop off of the back.
129     ///
130     /// The worklist will not contain duplicates but may contain null entries
131     /// due to nodes being deleted from the underlying DAG.
132     SmallVector<SDNode *, 64> Worklist;
133 
134     /// Mapping from an SDNode to its position on the worklist.
135     ///
136     /// This is used to find and remove nodes from the worklist (by nulling
137     /// them) when they are deleted from the underlying DAG. It relies on
138     /// stable indices of nodes within the worklist.
139     DenseMap<SDNode *, unsigned> WorklistMap;
140 
141     /// Set of nodes which have been combined (at least once).
142     ///
143     /// This is used to allow us to reliably add any operands of a DAG node
144     /// which have not yet been combined to the worklist.
145     SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147     // AA - Used for DAG load/store alias analysis.
148     AliasAnalysis *AA;
149 
150     /// When an instruction is simplified, add all users of the instruction to
151     /// the work lists because they might get more simplified now.
152     void AddUsersToWorklist(SDNode *N) {
153       for (SDNode *Node : N->uses())
154         AddToWorklist(Node);
155     }
156 
157     /// Call the node-specific routine that folds each particular type of node.
158     SDValue visit(SDNode *N);
159 
160   public:
161     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163           OptLevel(OL), AA(AA) {
164       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166       MaximumLegalStoreInBits = 0;
167       for (MVT VT : MVT::all_valuetypes())
168         if (EVT(VT).isSimple() && VT != MVT::Other &&
169             TLI.isTypeLegal(EVT(VT)) &&
170             VT.getSizeInBits() >= MaximumLegalStoreInBits)
171           MaximumLegalStoreInBits = VT.getSizeInBits();
172     }
173 
174     /// Add to the worklist making sure its instance is at the back (next to be
175     /// processed.)
176     void AddToWorklist(SDNode *N) {
177       assert(N->getOpcode() != ISD::DELETED_NODE &&
178              "Deleted Node added to Worklist");
179 
180       // Skip handle nodes as they can't usefully be combined and confuse the
181       // zero-use deletion strategy.
182       if (N->getOpcode() == ISD::HANDLENODE)
183         return;
184 
185       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186         Worklist.push_back(N);
187     }
188 
189     /// Remove all instances of N from the worklist.
190     void removeFromWorklist(SDNode *N) {
191       CombinedNodes.erase(N);
192 
193       auto It = WorklistMap.find(N);
194       if (It == WorklistMap.end())
195         return; // Not in the worklist.
196 
197       // Null out the entry rather than erasing it to avoid a linear operation.
198       Worklist[It->second] = nullptr;
199       WorklistMap.erase(It);
200     }
201 
202     void deleteAndRecombine(SDNode *N);
203     bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205     /// Replaces all uses of the results of one DAG node with new values.
206     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207                       bool AddTo = true);
208 
209     /// Replaces all uses of the results of one DAG node with new values.
210     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211       return CombineTo(N, &Res, 1, AddTo);
212     }
213 
214     /// Replaces all uses of the results of one DAG node with new values.
215     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216                       bool AddTo = true) {
217       SDValue To[] = { Res0, Res1 };
218       return CombineTo(N, To, 2, AddTo);
219     }
220 
221     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223   private:
224     unsigned MaximumLegalStoreInBits;
225 
226     /// Check the specified integer node value to see if it can be simplified or
227     /// if things it uses can be simplified by bit propagation.
228     /// If so, return true.
229     bool SimplifyDemandedBits(SDValue Op) {
230       unsigned BitWidth = Op.getScalarValueSizeInBits();
231       APInt Demanded = APInt::getAllOnesValue(BitWidth);
232       return SimplifyDemandedBits(Op, Demanded);
233     }
234 
235     /// Check the specified vector node value to see if it can be simplified or
236     /// if things it uses can be simplified as it only uses some of the
237     /// elements. If so, return true.
238     bool SimplifyDemandedVectorElts(SDValue Op) {
239       unsigned NumElts = Op.getValueType().getVectorNumElements();
240       APInt Demanded = APInt::getAllOnesValue(NumElts);
241       return SimplifyDemandedVectorElts(Op, Demanded);
242     }
243 
244     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
246 
247     bool CombineToPreIndexedLoadStore(SDNode *N);
248     bool CombineToPostIndexedLoadStore(SDNode *N);
249     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250     bool SliceUpLoad(SDNode *N);
251 
252     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253     ///   load.
254     ///
255     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257     /// \param EltNo index of the vector element to load.
258     /// \param OriginalLoad load that EVE came from to be replaced.
259     /// \returns EVE on success SDValue() on failure.
260     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266     SDValue PromoteIntBinOp(SDValue Op);
267     SDValue PromoteIntShiftOp(SDValue Op);
268     SDValue PromoteExtend(SDValue Op);
269     bool PromoteLoad(SDValue Op);
270 
271     /// Call the node-specific routine that knows how to fold each
272     /// particular type of node. If that doesn't do anything, try the
273     /// target-specific DAG combines.
274     SDValue combine(SDNode *N);
275 
276     // Visitation implementation - Implement dag node combining for different
277     // node types.  The semantics are as follows:
278     // Return Value:
279     //   SDValue.getNode() == 0 - No change was made
280     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
281     //   otherwise              - N should be replaced by the returned Operand.
282     //
283     SDValue visitTokenFactor(SDNode *N);
284     SDValue visitMERGE_VALUES(SDNode *N);
285     SDValue visitADD(SDNode *N);
286     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
287     SDValue visitSUB(SDNode *N);
288     SDValue visitADDC(SDNode *N);
289     SDValue visitUADDO(SDNode *N);
290     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
291     SDValue visitSUBC(SDNode *N);
292     SDValue visitUSUBO(SDNode *N);
293     SDValue visitADDE(SDNode *N);
294     SDValue visitADDCARRY(SDNode *N);
295     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
296     SDValue visitSUBE(SDNode *N);
297     SDValue visitSUBCARRY(SDNode *N);
298     SDValue visitMUL(SDNode *N);
299     SDValue useDivRem(SDNode *N);
300     SDValue visitSDIV(SDNode *N);
301     SDValue visitUDIV(SDNode *N);
302     SDValue visitREM(SDNode *N);
303     SDValue visitMULHU(SDNode *N);
304     SDValue visitMULHS(SDNode *N);
305     SDValue visitSMUL_LOHI(SDNode *N);
306     SDValue visitUMUL_LOHI(SDNode *N);
307     SDValue visitSMULO(SDNode *N);
308     SDValue visitUMULO(SDNode *N);
309     SDValue visitIMINMAX(SDNode *N);
310     SDValue visitAND(SDNode *N);
311     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
312     SDValue visitOR(SDNode *N);
313     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
314     SDValue visitXOR(SDNode *N);
315     SDValue SimplifyVBinOp(SDNode *N);
316     SDValue visitSHL(SDNode *N);
317     SDValue visitSRA(SDNode *N);
318     SDValue visitSRL(SDNode *N);
319     SDValue visitRotate(SDNode *N);
320     SDValue visitABS(SDNode *N);
321     SDValue visitBSWAP(SDNode *N);
322     SDValue visitBITREVERSE(SDNode *N);
323     SDValue visitCTLZ(SDNode *N);
324     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
325     SDValue visitCTTZ(SDNode *N);
326     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
327     SDValue visitCTPOP(SDNode *N);
328     SDValue visitSELECT(SDNode *N);
329     SDValue visitVSELECT(SDNode *N);
330     SDValue visitSELECT_CC(SDNode *N);
331     SDValue visitSETCC(SDNode *N);
332     SDValue visitSETCCCARRY(SDNode *N);
333     SDValue visitSIGN_EXTEND(SDNode *N);
334     SDValue visitZERO_EXTEND(SDNode *N);
335     SDValue visitANY_EXTEND(SDNode *N);
336     SDValue visitAssertExt(SDNode *N);
337     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
338     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
339     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
340     SDValue visitTRUNCATE(SDNode *N);
341     SDValue visitBITCAST(SDNode *N);
342     SDValue visitBUILD_PAIR(SDNode *N);
343     SDValue visitFADD(SDNode *N);
344     SDValue visitFSUB(SDNode *N);
345     SDValue visitFMUL(SDNode *N);
346     SDValue visitFMA(SDNode *N);
347     SDValue visitFDIV(SDNode *N);
348     SDValue visitFREM(SDNode *N);
349     SDValue visitFSQRT(SDNode *N);
350     SDValue visitFCOPYSIGN(SDNode *N);
351     SDValue visitSINT_TO_FP(SDNode *N);
352     SDValue visitUINT_TO_FP(SDNode *N);
353     SDValue visitFP_TO_SINT(SDNode *N);
354     SDValue visitFP_TO_UINT(SDNode *N);
355     SDValue visitFP_ROUND(SDNode *N);
356     SDValue visitFP_ROUND_INREG(SDNode *N);
357     SDValue visitFP_EXTEND(SDNode *N);
358     SDValue visitFNEG(SDNode *N);
359     SDValue visitFABS(SDNode *N);
360     SDValue visitFCEIL(SDNode *N);
361     SDValue visitFTRUNC(SDNode *N);
362     SDValue visitFFLOOR(SDNode *N);
363     SDValue visitFMINNUM(SDNode *N);
364     SDValue visitFMAXNUM(SDNode *N);
365     SDValue visitBRCOND(SDNode *N);
366     SDValue visitBR_CC(SDNode *N);
367     SDValue visitLOAD(SDNode *N);
368 
369     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
370     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
371 
372     SDValue visitSTORE(SDNode *N);
373     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
374     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
375     SDValue visitBUILD_VECTOR(SDNode *N);
376     SDValue visitCONCAT_VECTORS(SDNode *N);
377     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
378     SDValue visitVECTOR_SHUFFLE(SDNode *N);
379     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
380     SDValue visitINSERT_SUBVECTOR(SDNode *N);
381     SDValue visitMLOAD(SDNode *N);
382     SDValue visitMSTORE(SDNode *N);
383     SDValue visitMGATHER(SDNode *N);
384     SDValue visitMSCATTER(SDNode *N);
385     SDValue visitFP_TO_FP16(SDNode *N);
386     SDValue visitFP16_TO_FP(SDNode *N);
387 
388     SDValue visitFADDForFMACombine(SDNode *N);
389     SDValue visitFSUBForFMACombine(SDNode *N);
390     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
391 
392     SDValue XformToShuffleWithZero(SDNode *N);
393     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
394                            SDValue RHS);
395 
396     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
397 
398     SDValue foldSelectOfConstants(SDNode *N);
399     SDValue foldVSelectOfConstants(SDNode *N);
400     SDValue foldBinOpIntoSelect(SDNode *BO);
401     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
402     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
403     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
404     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
405                              SDValue N2, SDValue N3, ISD::CondCode CC,
406                              bool NotExtCompare = false);
407     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
408                                    SDValue N2, SDValue N3, ISD::CondCode CC);
409     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
410                               const SDLoc &DL);
411     SDValue unfoldMaskedMerge(SDNode *N);
412     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
413                           const SDLoc &DL, bool foldBooleans);
414     SDValue rebuildSetCC(SDValue N);
415 
416     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
417                            SDValue &CC) const;
418     bool isOneUseSetCC(SDValue N) const;
419 
420     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
421                                          unsigned HiOp);
422     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
423     SDValue CombineExtLoad(SDNode *N);
424     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
425     SDValue combineRepeatedFPDivisors(SDNode *N);
426     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
427     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
428     SDValue BuildSDIV(SDNode *N);
429     SDValue BuildSDIVPow2(SDNode *N);
430     SDValue BuildUDIV(SDNode *N);
431     SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
432     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
433     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
434     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
435     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
436     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
437                                 SDNodeFlags Flags, bool Reciprocal);
438     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
439                                 SDNodeFlags Flags, bool Reciprocal);
440     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
441                                bool DemandHighBits = true);
442     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
443     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
444                               SDValue InnerPos, SDValue InnerNeg,
445                               unsigned PosOpcode, unsigned NegOpcode,
446                               const SDLoc &DL);
447     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
448     SDValue MatchLoadCombine(SDNode *N);
449     SDValue ReduceLoadWidth(SDNode *N);
450     SDValue ReduceLoadOpStoreWidth(SDNode *N);
451     SDValue splitMergedValStore(StoreSDNode *ST);
452     SDValue TransformFPLoadStorePair(SDNode *N);
453     SDValue convertBuildVecZextToZext(SDNode *N);
454     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
455     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
456     SDValue reduceBuildVecToShuffle(SDNode *N);
457     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
458                                   ArrayRef<int> VectorMask, SDValue VecIn1,
459                                   SDValue VecIn2, unsigned LeftIdx);
460     SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
461 
462     /// Walk up chain skipping non-aliasing memory nodes,
463     /// looking for aliasing nodes and adding them to the Aliases vector.
464     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
465                           SmallVectorImpl<SDValue> &Aliases);
466 
467     /// Return true if there is any possibility that the two addresses overlap.
468     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
469 
470     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
471     /// chain (aliasing node.)
472     SDValue FindBetterChain(SDNode *N, SDValue Chain);
473 
474     /// Try to replace a store and any possibly adjacent stores on
475     /// consecutive chains with better chains. Return true only if St is
476     /// replaced.
477     ///
478     /// Notice that other chains may still be replaced even if the function
479     /// returns false.
480     bool findBetterNeighborChains(StoreSDNode *St);
481 
482     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
483     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
484 
485     /// Holds a pointer to an LSBaseSDNode as well as information on where it
486     /// is located in a sequence of memory operations connected by a chain.
487     struct MemOpLink {
488       // Ptr to the mem node.
489       LSBaseSDNode *MemNode;
490 
491       // Offset from the base ptr.
492       int64_t OffsetFromBase;
493 
494       MemOpLink(LSBaseSDNode *N, int64_t Offset)
495           : MemNode(N), OffsetFromBase(Offset) {}
496     };
497 
498     /// This is a helper function for visitMUL to check the profitability
499     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
500     /// MulNode is the original multiply, AddNode is (add x, c1),
501     /// and ConstNode is c2.
502     bool isMulAddWithConstProfitable(SDNode *MulNode,
503                                      SDValue &AddNode,
504                                      SDValue &ConstNode);
505 
506     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
507     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
508     /// the type of the loaded value to be extended.
509     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
510                           EVT LoadResultTy, EVT &ExtVT);
511 
512     /// Helper function to calculate whether the given Load can have its
513     /// width reduced to ExtVT.
514     bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
515                            EVT &ExtVT, unsigned ShAmt = 0);
516 
517     /// Used by BackwardsPropagateMask to find suitable loads.
518     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
519                            SmallPtrSetImpl<SDNode*> &NodeWithConsts,
520                            ConstantSDNode *Mask, SDNode *&UncombinedNode);
521     /// Attempt to propagate a given AND node back to load leaves so that they
522     /// can be combined into narrow loads.
523     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
524 
525     /// Helper function for MergeConsecutiveStores which merges the
526     /// component store chains.
527     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
528                                 unsigned NumStores);
529 
530     /// This is a helper function for MergeConsecutiveStores. When the
531     /// source elements of the consecutive stores are all constants or
532     /// all extracted vector elements, try to merge them into one
533     /// larger store introducing bitcasts if necessary.  \return True
534     /// if a merged store was created.
535     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
536                                          EVT MemVT, unsigned NumStores,
537                                          bool IsConstantSrc, bool UseVector,
538                                          bool UseTrunc);
539 
540     /// This is a helper function for MergeConsecutiveStores. Stores
541     /// that potentially may be merged with St are placed in
542     /// StoreNodes. RootNode is a chain predecessor to all store
543     /// candidates.
544     void getStoreMergeCandidates(StoreSDNode *St,
545                                  SmallVectorImpl<MemOpLink> &StoreNodes,
546                                  SDNode *&Root);
547 
548     /// Helper function for MergeConsecutiveStores. Checks if
549     /// candidate stores have indirect dependency through their
550     /// operands. RootNode is the predecessor to all stores calculated
551     /// by getStoreMergeCandidates and is used to prune the dependency check.
552     /// \return True if safe to merge.
553     bool checkMergeStoreCandidatesForDependencies(
554         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
555         SDNode *RootNode);
556 
557     /// Merge consecutive store operations into a wide store.
558     /// This optimization uses wide integers or vectors when possible.
559     /// \return number of stores that were merged into a merged store (the
560     /// affected nodes are stored as a prefix in \p StoreNodes).
561     bool MergeConsecutiveStores(StoreSDNode *N);
562 
563     /// Try to transform a truncation where C is a constant:
564     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
565     ///
566     /// \p N needs to be a truncation and its first operand an AND. Other
567     /// requirements are checked by the function (e.g. that trunc is
568     /// single-use) and if missed an empty SDValue is returned.
569     SDValue distributeTruncateThroughAnd(SDNode *N);
570 
571     /// Helper function to determine whether the target supports operation
572     /// given by \p Opcode for type \p VT, that is, whether the operation
573     /// is legal or custom before legalizing operations, and whether is
574     /// legal (but not custom) after legalization.
575     bool hasOperation(unsigned Opcode, EVT VT) {
576       if (LegalOperations)
577         return TLI.isOperationLegal(Opcode, VT);
578       return TLI.isOperationLegalOrCustom(Opcode, VT);
579     }
580 
581   public:
582     /// Runs the dag combiner on all nodes in the work list
583     void Run(CombineLevel AtLevel);
584 
585     SelectionDAG &getDAG() const { return DAG; }
586 
587     /// Returns a type large enough to hold any valid shift amount - before type
588     /// legalization these can be huge.
589     EVT getShiftAmountTy(EVT LHSTy) {
590       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
591       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
592     }
593 
594     /// This method returns true if we are running before type legalization or
595     /// if the specified VT is legal.
596     bool isTypeLegal(const EVT &VT) {
597       if (!LegalTypes) return true;
598       return TLI.isTypeLegal(VT);
599     }
600 
601     /// Convenience wrapper around TargetLowering::getSetCCResultType
602     EVT getSetCCResultType(EVT VT) const {
603       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
604     }
605 
606     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
607                          SDValue OrigLoad, SDValue ExtLoad,
608                          ISD::NodeType ExtType);
609   };
610 
611 /// This class is a DAGUpdateListener that removes any deleted
612 /// nodes from the worklist.
613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
614   DAGCombiner &DC;
615 
616 public:
617   explicit WorklistRemover(DAGCombiner &dc)
618     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
619 
620   void NodeDeleted(SDNode *N, SDNode *E) override {
621     DC.removeFromWorklist(N);
622   }
623 };
624 
625 } // end anonymous namespace
626 
627 //===----------------------------------------------------------------------===//
628 //  TargetLowering::DAGCombinerInfo implementation
629 //===----------------------------------------------------------------------===//
630 
631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
632   ((DAGCombiner*)DC)->AddToWorklist(N);
633 }
634 
635 SDValue TargetLowering::DAGCombinerInfo::
636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
637   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
638 }
639 
640 SDValue TargetLowering::DAGCombinerInfo::
641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
642   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
643 }
644 
645 SDValue TargetLowering::DAGCombinerInfo::
646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
647   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
648 }
649 
650 void TargetLowering::DAGCombinerInfo::
651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
652   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
653 }
654 
655 //===----------------------------------------------------------------------===//
656 // Helper Functions
657 //===----------------------------------------------------------------------===//
658 
659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
660   removeFromWorklist(N);
661 
662   // If the operands of this node are only used by the node, they will now be
663   // dead. Make sure to re-visit them and recursively delete dead nodes.
664   for (const SDValue &Op : N->ops())
665     // For an operand generating multiple values, one of the values may
666     // become dead allowing further simplification (e.g. split index
667     // arithmetic from an indexed load).
668     if (Op->hasOneUse() || Op->getNumValues() > 1)
669       AddToWorklist(Op.getNode());
670 
671   DAG.DeleteNode(N);
672 }
673 
674 /// Return 1 if we can compute the negated form of the specified expression for
675 /// the same cost as the expression itself, or 2 if we can compute the negated
676 /// form more cheaply than the expression itself.
677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
678                                const TargetLowering &TLI,
679                                const TargetOptions *Options,
680                                unsigned Depth = 0) {
681   // fneg is removable even if it has multiple uses.
682   if (Op.getOpcode() == ISD::FNEG) return 2;
683 
684   // Don't allow anything with multiple uses unless we know it is free.
685   EVT VT = Op.getValueType();
686   const SDNodeFlags Flags = Op->getFlags();
687   if (!Op.hasOneUse())
688     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
689           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
690       return 0;
691 
692   // Don't recurse exponentially.
693   if (Depth > 6) return 0;
694 
695   switch (Op.getOpcode()) {
696   default: return false;
697   case ISD::ConstantFP: {
698     if (!LegalOperations)
699       return 1;
700 
701     // Don't invert constant FP values after legalization unless the target says
702     // the negated constant is legal.
703     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
704       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
705   }
706   case ISD::FADD:
707     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
708       return 0;
709 
710     // After operation legalization, it might not be legal to create new FSUBs.
711     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
712       return 0;
713 
714     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
715     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
716                                     Options, Depth + 1))
717       return V;
718     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
719     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
720                               Depth + 1);
721   case ISD::FSUB:
722     // We can't turn -(A-B) into B-A when we honor signed zeros.
723     if (!Options->NoSignedZerosFPMath &&
724         !Flags.hasNoSignedZeros())
725       return 0;
726 
727     // fold (fneg (fsub A, B)) -> (fsub B, A)
728     return 1;
729 
730   case ISD::FMUL:
731   case ISD::FDIV:
732     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
733     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
734                                     Options, Depth + 1))
735       return V;
736 
737     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
738                               Depth + 1);
739 
740   case ISD::FP_EXTEND:
741   case ISD::FP_ROUND:
742   case ISD::FSIN:
743     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
744                               Depth + 1);
745   }
746 }
747 
748 /// If isNegatibleForFree returns true, return the newly negated expression.
749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
750                                     bool LegalOperations, unsigned Depth = 0) {
751   const TargetOptions &Options = DAG.getTarget().Options;
752   // fneg is removable even if it has multiple uses.
753   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
754 
755   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
756 
757   const SDNodeFlags Flags = Op.getNode()->getFlags();
758 
759   switch (Op.getOpcode()) {
760   default: llvm_unreachable("Unknown code");
761   case ISD::ConstantFP: {
762     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
763     V.changeSign();
764     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
765   }
766   case ISD::FADD:
767     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
768 
769     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
770     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
771                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
772       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
773                          GetNegatedExpression(Op.getOperand(0), DAG,
774                                               LegalOperations, Depth+1),
775                          Op.getOperand(1), Flags);
776     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
777     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
778                        GetNegatedExpression(Op.getOperand(1), DAG,
779                                             LegalOperations, Depth+1),
780                        Op.getOperand(0), Flags);
781   case ISD::FSUB:
782     // fold (fneg (fsub 0, B)) -> B
783     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
784       if (N0CFP->isZero())
785         return Op.getOperand(1);
786 
787     // fold (fneg (fsub A, B)) -> (fsub B, A)
788     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
789                        Op.getOperand(1), Op.getOperand(0), Flags);
790 
791   case ISD::FMUL:
792   case ISD::FDIV:
793     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
794     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
795                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
796       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
797                          GetNegatedExpression(Op.getOperand(0), DAG,
798                                               LegalOperations, Depth+1),
799                          Op.getOperand(1), Flags);
800 
801     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
802     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
803                        Op.getOperand(0),
804                        GetNegatedExpression(Op.getOperand(1), DAG,
805                                             LegalOperations, Depth+1), Flags);
806 
807   case ISD::FP_EXTEND:
808   case ISD::FSIN:
809     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810                        GetNegatedExpression(Op.getOperand(0), DAG,
811                                             LegalOperations, Depth+1));
812   case ISD::FP_ROUND:
813       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
814                          GetNegatedExpression(Op.getOperand(0), DAG,
815                                               LegalOperations, Depth+1),
816                          Op.getOperand(1));
817   }
818 }
819 
820 // APInts must be the same size for most operations, this helper
821 // function zero extends the shorter of the pair so that they match.
822 // We provide an Offset so that we can create bitwidths that won't overflow.
823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
824   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
825   LHS = LHS.zextOrSelf(Bits);
826   RHS = RHS.zextOrSelf(Bits);
827 }
828 
829 // Return true if this node is a setcc, or is a select_cc
830 // that selects between the target values used for true and false, making it
831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
832 // the appropriate nodes based on the type of node we are checking. This
833 // simplifies life a bit for the callers.
834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
835                                     SDValue &CC) const {
836   if (N.getOpcode() == ISD::SETCC) {
837     LHS = N.getOperand(0);
838     RHS = N.getOperand(1);
839     CC  = N.getOperand(2);
840     return true;
841   }
842 
843   if (N.getOpcode() != ISD::SELECT_CC ||
844       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
845       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
846     return false;
847 
848   if (TLI.getBooleanContents(N.getValueType()) ==
849       TargetLowering::UndefinedBooleanContent)
850     return false;
851 
852   LHS = N.getOperand(0);
853   RHS = N.getOperand(1);
854   CC  = N.getOperand(4);
855   return true;
856 }
857 
858 /// Return true if this is a SetCC-equivalent operation with only one use.
859 /// If this is true, it allows the users to invert the operation for free when
860 /// it is profitable to do so.
861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
862   SDValue N0, N1, N2;
863   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
864     return true;
865   return false;
866 }
867 
868 static SDValue peekThroughBitcast(SDValue V) {
869   while (V.getOpcode() == ISD::BITCAST)
870     V = V.getOperand(0);
871   return V;
872 }
873 
874 // Returns the SDNode if it is a constant float BuildVector
875 // or constant float.
876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
877   if (isa<ConstantFPSDNode>(N))
878     return N.getNode();
879   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
880     return N.getNode();
881   return nullptr;
882 }
883 
884 // Determines if it is a constant integer or a build vector of constant
885 // integers (and undefs).
886 // Do not permit build vector implicit truncation.
887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
888   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
889     return !(Const->isOpaque() && NoOpaques);
890   if (N.getOpcode() != ISD::BUILD_VECTOR)
891     return false;
892   unsigned BitWidth = N.getScalarValueSizeInBits();
893   for (const SDValue &Op : N->op_values()) {
894     if (Op.isUndef())
895       continue;
896     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
897     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
898         (Const->isOpaque() && NoOpaques))
899       return false;
900   }
901   return true;
902 }
903 
904 // Determines if it is a constant null integer or a splatted vector of a
905 // constant null integer (with no undefs).
906 // Build vector implicit truncation is not an issue for null values.
907 static bool isNullConstantOrNullSplatConstant(SDValue N) {
908   // TODO: may want to use peekThroughBitcast() here.
909   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
910     return Splat->isNullValue();
911   return false;
912 }
913 
914 // Determines if it is a constant integer of one or a splatted vector of a
915 // constant integer of one (with no undefs).
916 // Do not permit build vector implicit truncation.
917 static bool isOneConstantOrOneSplatConstant(SDValue N) {
918   // TODO: may want to use peekThroughBitcast() here.
919   unsigned BitWidth = N.getScalarValueSizeInBits();
920   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
921     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
922   return false;
923 }
924 
925 // Determines if it is a constant integer of all ones or a splatted vector of a
926 // constant integer of all ones (with no undefs).
927 // Do not permit build vector implicit truncation.
928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
929   N = peekThroughBitcast(N);
930   unsigned BitWidth = N.getScalarValueSizeInBits();
931   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
932     return Splat->isAllOnesValue() &&
933            Splat->getAPIntValue().getBitWidth() == BitWidth;
934   return false;
935 }
936 
937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
938 // undef's.
939 static bool isAnyConstantBuildVector(const SDNode *N) {
940   return ISD::isBuildVectorOfConstantSDNodes(N) ||
941          ISD::isBuildVectorOfConstantFPSDNodes(N);
942 }
943 
944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
945                                     SDValue N1) {
946   EVT VT = N0.getValueType();
947   if (N0.getOpcode() == Opc) {
948     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
949       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
950         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
951         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
952           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
953         return SDValue();
954       }
955       if (N0.hasOneUse()) {
956         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
957         // use
958         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
959         if (!OpNode.getNode())
960           return SDValue();
961         AddToWorklist(OpNode.getNode());
962         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
963       }
964     }
965   }
966 
967   if (N1.getOpcode() == Opc) {
968     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
969       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
970         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
971         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
972           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
973         return SDValue();
974       }
975       if (N1.hasOneUse()) {
976         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
977         // use
978         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
979         if (!OpNode.getNode())
980           return SDValue();
981         AddToWorklist(OpNode.getNode());
982         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
983       }
984     }
985   }
986 
987   return SDValue();
988 }
989 
990 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
991                                bool AddTo) {
992   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
993   ++NodesCombined;
994   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
995              To[0].getNode()->dump(&DAG);
996              dbgs() << " and " << NumTo - 1 << " other values\n");
997   for (unsigned i = 0, e = NumTo; i != e; ++i)
998     assert((!To[i].getNode() ||
999             N->getValueType(i) == To[i].getValueType()) &&
1000            "Cannot combine value to value of different type!");
1001 
1002   WorklistRemover DeadNodes(*this);
1003   DAG.ReplaceAllUsesWith(N, To);
1004   if (AddTo) {
1005     // Push the new nodes and any users onto the worklist
1006     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1007       if (To[i].getNode()) {
1008         AddToWorklist(To[i].getNode());
1009         AddUsersToWorklist(To[i].getNode());
1010       }
1011     }
1012   }
1013 
1014   // Finally, if the node is now dead, remove it from the graph.  The node
1015   // may not be dead if the replacement process recursively simplified to
1016   // something else needing this node.
1017   if (N->use_empty())
1018     deleteAndRecombine(N);
1019   return SDValue(N, 0);
1020 }
1021 
1022 void DAGCombiner::
1023 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1024   // Replace all uses.  If any nodes become isomorphic to other nodes and
1025   // are deleted, make sure to remove them from our worklist.
1026   WorklistRemover DeadNodes(*this);
1027   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1028 
1029   // Push the new node and any (possibly new) users onto the worklist.
1030   AddToWorklist(TLO.New.getNode());
1031   AddUsersToWorklist(TLO.New.getNode());
1032 
1033   // Finally, if the node is now dead, remove it from the graph.  The node
1034   // may not be dead if the replacement process recursively simplified to
1035   // something else needing this node.
1036   if (TLO.Old.getNode()->use_empty())
1037     deleteAndRecombine(TLO.Old.getNode());
1038 }
1039 
1040 /// Check the specified integer node value to see if it can be simplified or if
1041 /// things it uses can be simplified by bit propagation. If so, return true.
1042 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1043   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1044   KnownBits Known;
1045   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1046     return false;
1047 
1048   // Revisit the node.
1049   AddToWorklist(Op.getNode());
1050 
1051   // Replace the old value with the new one.
1052   ++NodesCombined;
1053   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1054              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1055              dbgs() << '\n');
1056 
1057   CommitTargetLoweringOpt(TLO);
1058   return true;
1059 }
1060 
1061 /// Check the specified vector node value to see if it can be simplified or
1062 /// if things it uses can be simplified as it only uses some of the elements.
1063 /// If so, return true.
1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1065                                              const APInt &Demanded) {
1066   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1067   APInt KnownUndef, KnownZero;
1068   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1069     return false;
1070 
1071   // Revisit the node.
1072   AddToWorklist(Op.getNode());
1073 
1074   // Replace the old value with the new one.
1075   ++NodesCombined;
1076   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1077              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1078              dbgs() << '\n');
1079 
1080   CommitTargetLoweringOpt(TLO);
1081   return true;
1082 }
1083 
1084 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1085   SDLoc DL(Load);
1086   EVT VT = Load->getValueType(0);
1087   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1088 
1089   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1090              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1091   WorklistRemover DeadNodes(*this);
1092   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1093   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1094   deleteAndRecombine(Load);
1095   AddToWorklist(Trunc.getNode());
1096 }
1097 
1098 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1099   Replace = false;
1100   SDLoc DL(Op);
1101   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1102     LoadSDNode *LD = cast<LoadSDNode>(Op);
1103     EVT MemVT = LD->getMemoryVT();
1104     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1105                                                       : LD->getExtensionType();
1106     Replace = true;
1107     return DAG.getExtLoad(ExtType, DL, PVT,
1108                           LD->getChain(), LD->getBasePtr(),
1109                           MemVT, LD->getMemOperand());
1110   }
1111 
1112   unsigned Opc = Op.getOpcode();
1113   switch (Opc) {
1114   default: break;
1115   case ISD::AssertSext:
1116     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1117       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1118     break;
1119   case ISD::AssertZext:
1120     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1121       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1122     break;
1123   case ISD::Constant: {
1124     unsigned ExtOpc =
1125       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1126     return DAG.getNode(ExtOpc, DL, PVT, Op);
1127   }
1128   }
1129 
1130   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1131     return SDValue();
1132   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1133 }
1134 
1135 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1136   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1137     return SDValue();
1138   EVT OldVT = Op.getValueType();
1139   SDLoc DL(Op);
1140   bool Replace = false;
1141   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1142   if (!NewOp.getNode())
1143     return SDValue();
1144   AddToWorklist(NewOp.getNode());
1145 
1146   if (Replace)
1147     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1148   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1149                      DAG.getValueType(OldVT));
1150 }
1151 
1152 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1153   EVT OldVT = Op.getValueType();
1154   SDLoc DL(Op);
1155   bool Replace = false;
1156   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1157   if (!NewOp.getNode())
1158     return SDValue();
1159   AddToWorklist(NewOp.getNode());
1160 
1161   if (Replace)
1162     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1163   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1164 }
1165 
1166 /// Promote the specified integer binary operation if the target indicates it is
1167 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1168 /// i32 since i16 instructions are longer.
1169 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1170   if (!LegalOperations)
1171     return SDValue();
1172 
1173   EVT VT = Op.getValueType();
1174   if (VT.isVector() || !VT.isInteger())
1175     return SDValue();
1176 
1177   // If operation type is 'undesirable', e.g. i16 on x86, consider
1178   // promoting it.
1179   unsigned Opc = Op.getOpcode();
1180   if (TLI.isTypeDesirableForOp(Opc, VT))
1181     return SDValue();
1182 
1183   EVT PVT = VT;
1184   // Consult target whether it is a good idea to promote this operation and
1185   // what's the right type to promote it to.
1186   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1187     assert(PVT != VT && "Don't know what type to promote to!");
1188 
1189     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1190 
1191     bool Replace0 = false;
1192     SDValue N0 = Op.getOperand(0);
1193     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1194 
1195     bool Replace1 = false;
1196     SDValue N1 = Op.getOperand(1);
1197     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1198     SDLoc DL(Op);
1199 
1200     SDValue RV =
1201         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1202 
1203     // We are always replacing N0/N1's use in N and only need
1204     // additional replacements if there are additional uses.
1205     Replace0 &= !N0->hasOneUse();
1206     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1207 
1208     // Combine Op here so it is preserved past replacements.
1209     CombineTo(Op.getNode(), RV);
1210 
1211     // If operands have a use ordering, make sure we deal with
1212     // predecessor first.
1213     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1214       std::swap(N0, N1);
1215       std::swap(NN0, NN1);
1216     }
1217 
1218     if (Replace0) {
1219       AddToWorklist(NN0.getNode());
1220       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1221     }
1222     if (Replace1) {
1223       AddToWorklist(NN1.getNode());
1224       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1225     }
1226     return Op;
1227   }
1228   return SDValue();
1229 }
1230 
1231 /// Promote the specified integer shift operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1235   if (!LegalOperations)
1236     return SDValue();
1237 
1238   EVT VT = Op.getValueType();
1239   if (VT.isVector() || !VT.isInteger())
1240     return SDValue();
1241 
1242   // If operation type is 'undesirable', e.g. i16 on x86, consider
1243   // promoting it.
1244   unsigned Opc = Op.getOpcode();
1245   if (TLI.isTypeDesirableForOp(Opc, VT))
1246     return SDValue();
1247 
1248   EVT PVT = VT;
1249   // Consult target whether it is a good idea to promote this operation and
1250   // what's the right type to promote it to.
1251   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1252     assert(PVT != VT && "Don't know what type to promote to!");
1253 
1254     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1255 
1256     bool Replace = false;
1257     SDValue N0 = Op.getOperand(0);
1258     SDValue N1 = Op.getOperand(1);
1259     if (Opc == ISD::SRA)
1260       N0 = SExtPromoteOperand(N0, PVT);
1261     else if (Opc == ISD::SRL)
1262       N0 = ZExtPromoteOperand(N0, PVT);
1263     else
1264       N0 = PromoteOperand(N0, PVT, Replace);
1265 
1266     if (!N0.getNode())
1267       return SDValue();
1268 
1269     SDLoc DL(Op);
1270     SDValue RV =
1271         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1272 
1273     AddToWorklist(N0.getNode());
1274     if (Replace)
1275       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1276 
1277     // Deal with Op being deleted.
1278     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1279       return RV;
1280   }
1281   return SDValue();
1282 }
1283 
1284 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1285   if (!LegalOperations)
1286     return SDValue();
1287 
1288   EVT VT = Op.getValueType();
1289   if (VT.isVector() || !VT.isInteger())
1290     return SDValue();
1291 
1292   // If operation type is 'undesirable', e.g. i16 on x86, consider
1293   // promoting it.
1294   unsigned Opc = Op.getOpcode();
1295   if (TLI.isTypeDesirableForOp(Opc, VT))
1296     return SDValue();
1297 
1298   EVT PVT = VT;
1299   // Consult target whether it is a good idea to promote this operation and
1300   // what's the right type to promote it to.
1301   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1302     assert(PVT != VT && "Don't know what type to promote to!");
1303     // fold (aext (aext x)) -> (aext x)
1304     // fold (aext (zext x)) -> (zext x)
1305     // fold (aext (sext x)) -> (sext x)
1306     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1307     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1308   }
1309   return SDValue();
1310 }
1311 
1312 bool DAGCombiner::PromoteLoad(SDValue Op) {
1313   if (!LegalOperations)
1314     return false;
1315 
1316   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1317     return false;
1318 
1319   EVT VT = Op.getValueType();
1320   if (VT.isVector() || !VT.isInteger())
1321     return false;
1322 
1323   // If operation type is 'undesirable', e.g. i16 on x86, consider
1324   // promoting it.
1325   unsigned Opc = Op.getOpcode();
1326   if (TLI.isTypeDesirableForOp(Opc, VT))
1327     return false;
1328 
1329   EVT PVT = VT;
1330   // Consult target whether it is a good idea to promote this operation and
1331   // what's the right type to promote it to.
1332   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1333     assert(PVT != VT && "Don't know what type to promote to!");
1334 
1335     SDLoc DL(Op);
1336     SDNode *N = Op.getNode();
1337     LoadSDNode *LD = cast<LoadSDNode>(N);
1338     EVT MemVT = LD->getMemoryVT();
1339     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1340                                                       : LD->getExtensionType();
1341     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1342                                    LD->getChain(), LD->getBasePtr(),
1343                                    MemVT, LD->getMemOperand());
1344     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1345 
1346     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1347                Result.getNode()->dump(&DAG); dbgs() << '\n');
1348     WorklistRemover DeadNodes(*this);
1349     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1350     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1351     deleteAndRecombine(N);
1352     AddToWorklist(Result.getNode());
1353     return true;
1354   }
1355   return false;
1356 }
1357 
1358 /// Recursively delete a node which has no uses and any operands for
1359 /// which it is the only use.
1360 ///
1361 /// Note that this both deletes the nodes and removes them from the worklist.
1362 /// It also adds any nodes who have had a user deleted to the worklist as they
1363 /// may now have only one use and subject to other combines.
1364 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1365   if (!N->use_empty())
1366     return false;
1367 
1368   SmallSetVector<SDNode *, 16> Nodes;
1369   Nodes.insert(N);
1370   do {
1371     N = Nodes.pop_back_val();
1372     if (!N)
1373       continue;
1374 
1375     if (N->use_empty()) {
1376       for (const SDValue &ChildN : N->op_values())
1377         Nodes.insert(ChildN.getNode());
1378 
1379       removeFromWorklist(N);
1380       DAG.DeleteNode(N);
1381     } else {
1382       AddToWorklist(N);
1383     }
1384   } while (!Nodes.empty());
1385   return true;
1386 }
1387 
1388 //===----------------------------------------------------------------------===//
1389 //  Main DAG Combiner implementation
1390 //===----------------------------------------------------------------------===//
1391 
1392 void DAGCombiner::Run(CombineLevel AtLevel) {
1393   // set the instance variables, so that the various visit routines may use it.
1394   Level = AtLevel;
1395   LegalOperations = Level >= AfterLegalizeVectorOps;
1396   LegalTypes = Level >= AfterLegalizeTypes;
1397 
1398   // Add all the dag nodes to the worklist.
1399   for (SDNode &Node : DAG.allnodes())
1400     AddToWorklist(&Node);
1401 
1402   // Create a dummy node (which is not added to allnodes), that adds a reference
1403   // to the root node, preventing it from being deleted, and tracking any
1404   // changes of the root.
1405   HandleSDNode Dummy(DAG.getRoot());
1406 
1407   // While the worklist isn't empty, find a node and try to combine it.
1408   while (!WorklistMap.empty()) {
1409     SDNode *N;
1410     // The Worklist holds the SDNodes in order, but it may contain null entries.
1411     do {
1412       N = Worklist.pop_back_val();
1413     } while (!N);
1414 
1415     bool GoodWorklistEntry = WorklistMap.erase(N);
1416     (void)GoodWorklistEntry;
1417     assert(GoodWorklistEntry &&
1418            "Found a worklist entry without a corresponding map entry!");
1419 
1420     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1421     // N is deleted from the DAG, since they too may now be dead or may have a
1422     // reduced number of uses, allowing other xforms.
1423     if (recursivelyDeleteUnusedNodes(N))
1424       continue;
1425 
1426     WorklistRemover DeadNodes(*this);
1427 
1428     // If this combine is running after legalizing the DAG, re-legalize any
1429     // nodes pulled off the worklist.
1430     if (Level == AfterLegalizeDAG) {
1431       SmallSetVector<SDNode *, 16> UpdatedNodes;
1432       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1433 
1434       for (SDNode *LN : UpdatedNodes) {
1435         AddToWorklist(LN);
1436         AddUsersToWorklist(LN);
1437       }
1438       if (!NIsValid)
1439         continue;
1440     }
1441 
1442     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1443 
1444     // Add any operands of the new node which have not yet been combined to the
1445     // worklist as well. Because the worklist uniques things already, this
1446     // won't repeatedly process the same operand.
1447     CombinedNodes.insert(N);
1448     for (const SDValue &ChildN : N->op_values())
1449       if (!CombinedNodes.count(ChildN.getNode()))
1450         AddToWorklist(ChildN.getNode());
1451 
1452     SDValue RV = combine(N);
1453 
1454     if (!RV.getNode())
1455       continue;
1456 
1457     ++NodesCombined;
1458 
1459     // If we get back the same node we passed in, rather than a new node or
1460     // zero, we know that the node must have defined multiple values and
1461     // CombineTo was used.  Since CombineTo takes care of the worklist
1462     // mechanics for us, we have no work to do in this case.
1463     if (RV.getNode() == N)
1464       continue;
1465 
1466     assert(N->getOpcode() != ISD::DELETED_NODE &&
1467            RV.getOpcode() != ISD::DELETED_NODE &&
1468            "Node was deleted but visit returned new node!");
1469 
1470     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1471 
1472     if (N->getNumValues() == RV.getNode()->getNumValues())
1473       DAG.ReplaceAllUsesWith(N, RV.getNode());
1474     else {
1475       assert(N->getValueType(0) == RV.getValueType() &&
1476              N->getNumValues() == 1 && "Type mismatch");
1477       DAG.ReplaceAllUsesWith(N, &RV);
1478     }
1479 
1480     // Push the new node and any users onto the worklist
1481     AddToWorklist(RV.getNode());
1482     AddUsersToWorklist(RV.getNode());
1483 
1484     // Finally, if the node is now dead, remove it from the graph.  The node
1485     // may not be dead if the replacement process recursively simplified to
1486     // something else needing this node. This will also take care of adding any
1487     // operands which have lost a user to the worklist.
1488     recursivelyDeleteUnusedNodes(N);
1489   }
1490 
1491   // If the root changed (e.g. it was a dead load, update the root).
1492   DAG.setRoot(Dummy.getValue());
1493   DAG.RemoveDeadNodes();
1494 }
1495 
1496 SDValue DAGCombiner::visit(SDNode *N) {
1497   switch (N->getOpcode()) {
1498   default: break;
1499   case ISD::TokenFactor:        return visitTokenFactor(N);
1500   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1501   case ISD::ADD:                return visitADD(N);
1502   case ISD::SUB:                return visitSUB(N);
1503   case ISD::ADDC:               return visitADDC(N);
1504   case ISD::UADDO:              return visitUADDO(N);
1505   case ISD::SUBC:               return visitSUBC(N);
1506   case ISD::USUBO:              return visitUSUBO(N);
1507   case ISD::ADDE:               return visitADDE(N);
1508   case ISD::ADDCARRY:           return visitADDCARRY(N);
1509   case ISD::SUBE:               return visitSUBE(N);
1510   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1511   case ISD::MUL:                return visitMUL(N);
1512   case ISD::SDIV:               return visitSDIV(N);
1513   case ISD::UDIV:               return visitUDIV(N);
1514   case ISD::SREM:
1515   case ISD::UREM:               return visitREM(N);
1516   case ISD::MULHU:              return visitMULHU(N);
1517   case ISD::MULHS:              return visitMULHS(N);
1518   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1519   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1520   case ISD::SMULO:              return visitSMULO(N);
1521   case ISD::UMULO:              return visitUMULO(N);
1522   case ISD::SMIN:
1523   case ISD::SMAX:
1524   case ISD::UMIN:
1525   case ISD::UMAX:               return visitIMINMAX(N);
1526   case ISD::AND:                return visitAND(N);
1527   case ISD::OR:                 return visitOR(N);
1528   case ISD::XOR:                return visitXOR(N);
1529   case ISD::SHL:                return visitSHL(N);
1530   case ISD::SRA:                return visitSRA(N);
1531   case ISD::SRL:                return visitSRL(N);
1532   case ISD::ROTR:
1533   case ISD::ROTL:               return visitRotate(N);
1534   case ISD::ABS:                return visitABS(N);
1535   case ISD::BSWAP:              return visitBSWAP(N);
1536   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1537   case ISD::CTLZ:               return visitCTLZ(N);
1538   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1539   case ISD::CTTZ:               return visitCTTZ(N);
1540   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1541   case ISD::CTPOP:              return visitCTPOP(N);
1542   case ISD::SELECT:             return visitSELECT(N);
1543   case ISD::VSELECT:            return visitVSELECT(N);
1544   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1545   case ISD::SETCC:              return visitSETCC(N);
1546   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1547   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1548   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1549   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1550   case ISD::AssertSext:
1551   case ISD::AssertZext:         return visitAssertExt(N);
1552   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1553   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1554   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1555   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1556   case ISD::BITCAST:            return visitBITCAST(N);
1557   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1558   case ISD::FADD:               return visitFADD(N);
1559   case ISD::FSUB:               return visitFSUB(N);
1560   case ISD::FMUL:               return visitFMUL(N);
1561   case ISD::FMA:                return visitFMA(N);
1562   case ISD::FDIV:               return visitFDIV(N);
1563   case ISD::FREM:               return visitFREM(N);
1564   case ISD::FSQRT:              return visitFSQRT(N);
1565   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1566   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1567   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1568   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1569   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1570   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1571   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1572   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1573   case ISD::FNEG:               return visitFNEG(N);
1574   case ISD::FABS:               return visitFABS(N);
1575   case ISD::FFLOOR:             return visitFFLOOR(N);
1576   case ISD::FMINNUM:            return visitFMINNUM(N);
1577   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1578   case ISD::FCEIL:              return visitFCEIL(N);
1579   case ISD::FTRUNC:             return visitFTRUNC(N);
1580   case ISD::BRCOND:             return visitBRCOND(N);
1581   case ISD::BR_CC:              return visitBR_CC(N);
1582   case ISD::LOAD:               return visitLOAD(N);
1583   case ISD::STORE:              return visitSTORE(N);
1584   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1585   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1586   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1587   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1588   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1589   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1590   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1591   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1592   case ISD::MGATHER:            return visitMGATHER(N);
1593   case ISD::MLOAD:              return visitMLOAD(N);
1594   case ISD::MSCATTER:           return visitMSCATTER(N);
1595   case ISD::MSTORE:             return visitMSTORE(N);
1596   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1597   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1598   }
1599   return SDValue();
1600 }
1601 
1602 SDValue DAGCombiner::combine(SDNode *N) {
1603   SDValue RV = visit(N);
1604 
1605   // If nothing happened, try a target-specific DAG combine.
1606   if (!RV.getNode()) {
1607     assert(N->getOpcode() != ISD::DELETED_NODE &&
1608            "Node was deleted but visit returned NULL!");
1609 
1610     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1611         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1612 
1613       // Expose the DAG combiner to the target combiner impls.
1614       TargetLowering::DAGCombinerInfo
1615         DagCombineInfo(DAG, Level, false, this);
1616 
1617       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1618     }
1619   }
1620 
1621   // If nothing happened still, try promoting the operation.
1622   if (!RV.getNode()) {
1623     switch (N->getOpcode()) {
1624     default: break;
1625     case ISD::ADD:
1626     case ISD::SUB:
1627     case ISD::MUL:
1628     case ISD::AND:
1629     case ISD::OR:
1630     case ISD::XOR:
1631       RV = PromoteIntBinOp(SDValue(N, 0));
1632       break;
1633     case ISD::SHL:
1634     case ISD::SRA:
1635     case ISD::SRL:
1636       RV = PromoteIntShiftOp(SDValue(N, 0));
1637       break;
1638     case ISD::SIGN_EXTEND:
1639     case ISD::ZERO_EXTEND:
1640     case ISD::ANY_EXTEND:
1641       RV = PromoteExtend(SDValue(N, 0));
1642       break;
1643     case ISD::LOAD:
1644       if (PromoteLoad(SDValue(N, 0)))
1645         RV = SDValue(N, 0);
1646       break;
1647     }
1648   }
1649 
1650   // If N is a commutative binary node, try eliminate it if the commuted
1651   // version is already present in the DAG.
1652   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1653       N->getNumValues() == 1) {
1654     SDValue N0 = N->getOperand(0);
1655     SDValue N1 = N->getOperand(1);
1656 
1657     // Constant operands are canonicalized to RHS.
1658     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1659       SDValue Ops[] = {N1, N0};
1660       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1661                                             N->getFlags());
1662       if (CSENode)
1663         return SDValue(CSENode, 0);
1664     }
1665   }
1666 
1667   return RV;
1668 }
1669 
1670 /// Given a node, return its input chain if it has one, otherwise return a null
1671 /// sd operand.
1672 static SDValue getInputChainForNode(SDNode *N) {
1673   if (unsigned NumOps = N->getNumOperands()) {
1674     if (N->getOperand(0).getValueType() == MVT::Other)
1675       return N->getOperand(0);
1676     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1677       return N->getOperand(NumOps-1);
1678     for (unsigned i = 1; i < NumOps-1; ++i)
1679       if (N->getOperand(i).getValueType() == MVT::Other)
1680         return N->getOperand(i);
1681   }
1682   return SDValue();
1683 }
1684 
1685 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1686   // If N has two operands, where one has an input chain equal to the other,
1687   // the 'other' chain is redundant.
1688   if (N->getNumOperands() == 2) {
1689     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1690       return N->getOperand(0);
1691     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1692       return N->getOperand(1);
1693   }
1694 
1695   // Don't simplify token factors if optnone.
1696   if (OptLevel == CodeGenOpt::None)
1697     return SDValue();
1698 
1699   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1700   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1701   SmallPtrSet<SDNode*, 16> SeenOps;
1702   bool Changed = false;             // If we should replace this token factor.
1703 
1704   // Start out with this token factor.
1705   TFs.push_back(N);
1706 
1707   // Iterate through token factors.  The TFs grows when new token factors are
1708   // encountered.
1709   for (unsigned i = 0; i < TFs.size(); ++i) {
1710     SDNode *TF = TFs[i];
1711 
1712     // Check each of the operands.
1713     for (const SDValue &Op : TF->op_values()) {
1714       switch (Op.getOpcode()) {
1715       case ISD::EntryToken:
1716         // Entry tokens don't need to be added to the list. They are
1717         // redundant.
1718         Changed = true;
1719         break;
1720 
1721       case ISD::TokenFactor:
1722         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1723           // Queue up for processing.
1724           TFs.push_back(Op.getNode());
1725           // Clean up in case the token factor is removed.
1726           AddToWorklist(Op.getNode());
1727           Changed = true;
1728           break;
1729         }
1730         LLVM_FALLTHROUGH;
1731 
1732       default:
1733         // Only add if it isn't already in the list.
1734         if (SeenOps.insert(Op.getNode()).second)
1735           Ops.push_back(Op);
1736         else
1737           Changed = true;
1738         break;
1739       }
1740     }
1741   }
1742 
1743   // Remove Nodes that are chained to another node in the list. Do so
1744   // by walking up chains breath-first stopping when we've seen
1745   // another operand. In general we must climb to the EntryNode, but we can exit
1746   // early if we find all remaining work is associated with just one operand as
1747   // no further pruning is possible.
1748 
1749   // List of nodes to search through and original Ops from which they originate.
1750   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1751   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1752   SmallPtrSet<SDNode *, 16> SeenChains;
1753   bool DidPruneOps = false;
1754 
1755   unsigned NumLeftToConsider = 0;
1756   for (const SDValue &Op : Ops) {
1757     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1758     OpWorkCount.push_back(1);
1759   }
1760 
1761   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1762     // If this is an Op, we can remove the op from the list. Remark any
1763     // search associated with it as from the current OpNumber.
1764     if (SeenOps.count(Op) != 0) {
1765       Changed = true;
1766       DidPruneOps = true;
1767       unsigned OrigOpNumber = 0;
1768       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1769         OrigOpNumber++;
1770       assert((OrigOpNumber != Ops.size()) &&
1771              "expected to find TokenFactor Operand");
1772       // Re-mark worklist from OrigOpNumber to OpNumber
1773       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1774         if (Worklist[i].second == OrigOpNumber) {
1775           Worklist[i].second = OpNumber;
1776         }
1777       }
1778       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1779       OpWorkCount[OrigOpNumber] = 0;
1780       NumLeftToConsider--;
1781     }
1782     // Add if it's a new chain
1783     if (SeenChains.insert(Op).second) {
1784       OpWorkCount[OpNumber]++;
1785       Worklist.push_back(std::make_pair(Op, OpNumber));
1786     }
1787   };
1788 
1789   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1790     // We need at least be consider at least 2 Ops to prune.
1791     if (NumLeftToConsider <= 1)
1792       break;
1793     auto CurNode = Worklist[i].first;
1794     auto CurOpNumber = Worklist[i].second;
1795     assert((OpWorkCount[CurOpNumber] > 0) &&
1796            "Node should not appear in worklist");
1797     switch (CurNode->getOpcode()) {
1798     case ISD::EntryToken:
1799       // Hitting EntryToken is the only way for the search to terminate without
1800       // hitting
1801       // another operand's search. Prevent us from marking this operand
1802       // considered.
1803       NumLeftToConsider++;
1804       break;
1805     case ISD::TokenFactor:
1806       for (const SDValue &Op : CurNode->op_values())
1807         AddToWorklist(i, Op.getNode(), CurOpNumber);
1808       break;
1809     case ISD::CopyFromReg:
1810     case ISD::CopyToReg:
1811       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1812       break;
1813     default:
1814       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1815         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1816       break;
1817     }
1818     OpWorkCount[CurOpNumber]--;
1819     if (OpWorkCount[CurOpNumber] == 0)
1820       NumLeftToConsider--;
1821   }
1822 
1823   // If we've changed things around then replace token factor.
1824   if (Changed) {
1825     SDValue Result;
1826     if (Ops.empty()) {
1827       // The entry token is the only possible outcome.
1828       Result = DAG.getEntryNode();
1829     } else {
1830       if (DidPruneOps) {
1831         SmallVector<SDValue, 8> PrunedOps;
1832         //
1833         for (const SDValue &Op : Ops) {
1834           if (SeenChains.count(Op.getNode()) == 0)
1835             PrunedOps.push_back(Op);
1836         }
1837         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1838       } else {
1839         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1840       }
1841     }
1842     return Result;
1843   }
1844   return SDValue();
1845 }
1846 
1847 /// MERGE_VALUES can always be eliminated.
1848 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1849   WorklistRemover DeadNodes(*this);
1850   // Replacing results may cause a different MERGE_VALUES to suddenly
1851   // be CSE'd with N, and carry its uses with it. Iterate until no
1852   // uses remain, to ensure that the node can be safely deleted.
1853   // First add the users of this node to the work list so that they
1854   // can be tried again once they have new operands.
1855   AddUsersToWorklist(N);
1856   do {
1857     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1858       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1859   } while (!N->use_empty());
1860   deleteAndRecombine(N);
1861   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1862 }
1863 
1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1865 /// ConstantSDNode pointer else nullptr.
1866 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1867   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1868   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1869 }
1870 
1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1872   auto BinOpcode = BO->getOpcode();
1873   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1874           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1875           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1876           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1877           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1878           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1879           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1880           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1881           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1882          "Unexpected binary operator");
1883 
1884   // Don't do this unless the old select is going away. We want to eliminate the
1885   // binary operator, not replace a binop with a select.
1886   // TODO: Handle ISD::SELECT_CC.
1887   unsigned SelOpNo = 0;
1888   SDValue Sel = BO->getOperand(0);
1889   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1890     SelOpNo = 1;
1891     Sel = BO->getOperand(1);
1892   }
1893 
1894   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1895     return SDValue();
1896 
1897   SDValue CT = Sel.getOperand(1);
1898   if (!isConstantOrConstantVector(CT, true) &&
1899       !isConstantFPBuildVectorOrConstantFP(CT))
1900     return SDValue();
1901 
1902   SDValue CF = Sel.getOperand(2);
1903   if (!isConstantOrConstantVector(CF, true) &&
1904       !isConstantFPBuildVectorOrConstantFP(CF))
1905     return SDValue();
1906 
1907   // Bail out if any constants are opaque because we can't constant fold those.
1908   // The exception is "and" and "or" with either 0 or -1 in which case we can
1909   // propagate non constant operands into select. I.e.:
1910   // and (select Cond, 0, -1), X --> select Cond, 0, X
1911   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1912   bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1913                          (isNullConstantOrNullSplatConstant(CT) ||
1914                           isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
1915                          (isNullConstantOrNullSplatConstant(CF) ||
1916                           isAllOnesConstantOrAllOnesSplatConstant(CF));
1917 
1918   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1919   if (!CanFoldNonConst &&
1920       !isConstantOrConstantVector(CBO, true) &&
1921       !isConstantFPBuildVectorOrConstantFP(CBO))
1922     return SDValue();
1923 
1924   EVT VT = Sel.getValueType();
1925 
1926   // In case of shift value and shift amount may have different VT. For instance
1927   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1928   // swapped operands and value types do not match. NB: x86 is fine if operands
1929   // are not swapped with shift amount VT being not bigger than shifted value.
1930   // TODO: that is possible to check for a shift operation, correct VTs and
1931   // still perform optimization on x86 if needed.
1932   if (SelOpNo && VT != CBO.getValueType())
1933     return SDValue();
1934 
1935   // We have a select-of-constants followed by a binary operator with a
1936   // constant. Eliminate the binop by pulling the constant math into the select.
1937   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1938   SDLoc DL(Sel);
1939   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1940                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1941   if (!CanFoldNonConst && !NewCT.isUndef() &&
1942       !isConstantOrConstantVector(NewCT, true) &&
1943       !isConstantFPBuildVectorOrConstantFP(NewCT))
1944     return SDValue();
1945 
1946   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1947                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1948   if (!CanFoldNonConst && !NewCF.isUndef() &&
1949       !isConstantOrConstantVector(NewCF, true) &&
1950       !isConstantFPBuildVectorOrConstantFP(NewCF))
1951     return SDValue();
1952 
1953   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1954 }
1955 
1956 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1957   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1958          "Expecting add or sub");
1959 
1960   // Match a constant operand and a zext operand for the math instruction:
1961   // add Z, C
1962   // sub C, Z
1963   bool IsAdd = N->getOpcode() == ISD::ADD;
1964   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1965   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1966   auto *CN = dyn_cast<ConstantSDNode>(C);
1967   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1968     return SDValue();
1969 
1970   // Match the zext operand as a setcc of a boolean.
1971   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1972       Z.getOperand(0).getValueType() != MVT::i1)
1973     return SDValue();
1974 
1975   // Match the compare as: setcc (X & 1), 0, eq.
1976   SDValue SetCC = Z.getOperand(0);
1977   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1978   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1979       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1980       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1981     return SDValue();
1982 
1983   // We are adding/subtracting a constant and an inverted low bit. Turn that
1984   // into a subtract/add of the low bit with incremented/decremented constant:
1985   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1986   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1987   EVT VT = C.getValueType();
1988   SDLoc DL(N);
1989   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1990   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1991                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1992   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1993 }
1994 
1995 SDValue DAGCombiner::visitADD(SDNode *N) {
1996   SDValue N0 = N->getOperand(0);
1997   SDValue N1 = N->getOperand(1);
1998   EVT VT = N0.getValueType();
1999   SDLoc DL(N);
2000 
2001   // fold vector ops
2002   if (VT.isVector()) {
2003     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2004       return FoldedVOp;
2005 
2006     // fold (add x, 0) -> x, vector edition
2007     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2008       return N0;
2009     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2010       return N1;
2011   }
2012 
2013   // fold (add x, undef) -> undef
2014   if (N0.isUndef())
2015     return N0;
2016 
2017   if (N1.isUndef())
2018     return N1;
2019 
2020   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2021     // canonicalize constant to RHS
2022     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2023       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2024     // fold (add c1, c2) -> c1+c2
2025     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2026                                       N1.getNode());
2027   }
2028 
2029   // fold (add x, 0) -> x
2030   if (isNullConstant(N1))
2031     return N0;
2032 
2033   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2034     // fold ((c1-A)+c2) -> (c1+c2)-A
2035     if (N0.getOpcode() == ISD::SUB &&
2036         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2037       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2038       return DAG.getNode(ISD::SUB, DL, VT,
2039                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2040                          N0.getOperand(1));
2041     }
2042 
2043     // add (sext i1 X), 1 -> zext (not i1 X)
2044     // We don't transform this pattern:
2045     //   add (zext i1 X), -1 -> sext (not i1 X)
2046     // because most (?) targets generate better code for the zext form.
2047     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2048         isOneConstantOrOneSplatConstant(N1)) {
2049       SDValue X = N0.getOperand(0);
2050       if ((!LegalOperations ||
2051            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2052             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2053           X.getScalarValueSizeInBits() == 1) {
2054         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2055         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2056       }
2057     }
2058 
2059     // Undo the add -> or combine to merge constant offsets from a frame index.
2060     if (N0.getOpcode() == ISD::OR &&
2061         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2062         isa<ConstantSDNode>(N0.getOperand(1)) &&
2063         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2064       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2065       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2066     }
2067   }
2068 
2069   if (SDValue NewSel = foldBinOpIntoSelect(N))
2070     return NewSel;
2071 
2072   // reassociate add
2073   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
2074     return RADD;
2075 
2076   // fold ((0-A) + B) -> B-A
2077   if (N0.getOpcode() == ISD::SUB &&
2078       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2079     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2080 
2081   // fold (A + (0-B)) -> A-B
2082   if (N1.getOpcode() == ISD::SUB &&
2083       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2084     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2085 
2086   // fold (A+(B-A)) -> B
2087   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2088     return N1.getOperand(0);
2089 
2090   // fold ((B-A)+A) -> B
2091   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2092     return N0.getOperand(0);
2093 
2094   // fold (A+(B-(A+C))) to (B-C)
2095   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2096       N0 == N1.getOperand(1).getOperand(0))
2097     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2098                        N1.getOperand(1).getOperand(1));
2099 
2100   // fold (A+(B-(C+A))) to (B-C)
2101   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2102       N0 == N1.getOperand(1).getOperand(1))
2103     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2104                        N1.getOperand(1).getOperand(0));
2105 
2106   // fold (A+((B-A)+or-C)) to (B+or-C)
2107   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2108       N1.getOperand(0).getOpcode() == ISD::SUB &&
2109       N0 == N1.getOperand(0).getOperand(1))
2110     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2111                        N1.getOperand(1));
2112 
2113   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2114   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2115     SDValue N00 = N0.getOperand(0);
2116     SDValue N01 = N0.getOperand(1);
2117     SDValue N10 = N1.getOperand(0);
2118     SDValue N11 = N1.getOperand(1);
2119 
2120     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2121       return DAG.getNode(ISD::SUB, DL, VT,
2122                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2123                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2124   }
2125 
2126   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2127     return V;
2128 
2129   if (SimplifyDemandedBits(SDValue(N, 0)))
2130     return SDValue(N, 0);
2131 
2132   // fold (a+b) -> (a|b) iff a and b share no bits.
2133   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2134       DAG.haveNoCommonBitsSet(N0, N1))
2135     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2136 
2137   // fold (add (xor a, -1), 1) -> (sub 0, a)
2138   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
2139     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2140                        N0.getOperand(0));
2141 
2142   if (SDValue Combined = visitADDLike(N0, N1, N))
2143     return Combined;
2144 
2145   if (SDValue Combined = visitADDLike(N1, N0, N))
2146     return Combined;
2147 
2148   return SDValue();
2149 }
2150 
2151 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2152   bool Masked = false;
2153 
2154   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2155   while (true) {
2156     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2157       V = V.getOperand(0);
2158       continue;
2159     }
2160 
2161     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2162       Masked = true;
2163       V = V.getOperand(0);
2164       continue;
2165     }
2166 
2167     break;
2168   }
2169 
2170   // If this is not a carry, return.
2171   if (V.getResNo() != 1)
2172     return SDValue();
2173 
2174   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2175       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2176     return SDValue();
2177 
2178   // If the result is masked, then no matter what kind of bool it is we can
2179   // return. If it isn't, then we need to make sure the bool type is either 0 or
2180   // 1 and not other values.
2181   if (Masked ||
2182       TLI.getBooleanContents(V.getValueType()) ==
2183           TargetLoweringBase::ZeroOrOneBooleanContent)
2184     return V;
2185 
2186   return SDValue();
2187 }
2188 
2189 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2190   EVT VT = N0.getValueType();
2191   SDLoc DL(LocReference);
2192 
2193   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2194   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2195       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2196     return DAG.getNode(ISD::SUB, DL, VT, N0,
2197                        DAG.getNode(ISD::SHL, DL, VT,
2198                                    N1.getOperand(0).getOperand(1),
2199                                    N1.getOperand(1)));
2200 
2201   if (N1.getOpcode() == ISD::AND) {
2202     SDValue AndOp0 = N1.getOperand(0);
2203     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2204     unsigned DestBits = VT.getScalarSizeInBits();
2205 
2206     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2207     // and similar xforms where the inner op is either ~0 or 0.
2208     if (NumSignBits == DestBits &&
2209         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2210       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2211   }
2212 
2213   // add (sext i1), X -> sub X, (zext i1)
2214   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2215       N0.getOperand(0).getValueType() == MVT::i1 &&
2216       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2217     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2218     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2219   }
2220 
2221   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2222   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2223     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2224     if (TN->getVT() == MVT::i1) {
2225       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2226                                  DAG.getConstant(1, DL, VT));
2227       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2228     }
2229   }
2230 
2231   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2232   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2233       N1.getResNo() == 0)
2234     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2235                        N0, N1.getOperand(0), N1.getOperand(2));
2236 
2237   // (add X, Carry) -> (addcarry X, 0, Carry)
2238   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2239     if (SDValue Carry = getAsCarry(TLI, N1))
2240       return DAG.getNode(ISD::ADDCARRY, DL,
2241                          DAG.getVTList(VT, Carry.getValueType()), N0,
2242                          DAG.getConstant(0, DL, VT), Carry);
2243 
2244   return SDValue();
2245 }
2246 
2247 SDValue DAGCombiner::visitADDC(SDNode *N) {
2248   SDValue N0 = N->getOperand(0);
2249   SDValue N1 = N->getOperand(1);
2250   EVT VT = N0.getValueType();
2251   SDLoc DL(N);
2252 
2253   // If the flag result is dead, turn this into an ADD.
2254   if (!N->hasAnyUseOfValue(1))
2255     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2256                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2257 
2258   // canonicalize constant to RHS.
2259   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2260   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2261   if (N0C && !N1C)
2262     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2263 
2264   // fold (addc x, 0) -> x + no carry out
2265   if (isNullConstant(N1))
2266     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2267                                         DL, MVT::Glue));
2268 
2269   // If it cannot overflow, transform into an add.
2270   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2271     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2272                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2273 
2274   return SDValue();
2275 }
2276 
2277 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2278                            SelectionDAG &DAG, const TargetLowering &TLI) {
2279   SDValue Cst;
2280   switch (TLI.getBooleanContents(VT)) {
2281   case TargetLowering::ZeroOrOneBooleanContent:
2282   case TargetLowering::UndefinedBooleanContent:
2283     Cst = DAG.getConstant(1, DL, VT);
2284     break;
2285   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2286     Cst = DAG.getConstant(-1, DL, VT);
2287     break;
2288   }
2289 
2290   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2291 }
2292 
2293 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2294   if (V.getOpcode() != ISD::XOR) return false;
2295   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2296   if (!Const) return false;
2297 
2298   switch(TLI.getBooleanContents(VT)) {
2299     case TargetLowering::ZeroOrOneBooleanContent:
2300       return Const->isOne();
2301     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2302       return Const->isAllOnesValue();
2303     case TargetLowering::UndefinedBooleanContent:
2304       return (Const->getAPIntValue() & 0x01) == 1;
2305   }
2306   llvm_unreachable("Unsupported boolean content");
2307 }
2308 
2309 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2310   SDValue N0 = N->getOperand(0);
2311   SDValue N1 = N->getOperand(1);
2312   EVT VT = N0.getValueType();
2313   if (VT.isVector())
2314     return SDValue();
2315 
2316   EVT CarryVT = N->getValueType(1);
2317   SDLoc DL(N);
2318 
2319   // If the flag result is dead, turn this into an ADD.
2320   if (!N->hasAnyUseOfValue(1))
2321     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2322                      DAG.getUNDEF(CarryVT));
2323 
2324   // canonicalize constant to RHS.
2325   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2326   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2327   if (N0C && !N1C)
2328     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2329 
2330   // fold (uaddo x, 0) -> x + no carry out
2331   if (isNullConstant(N1))
2332     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2333 
2334   // If it cannot overflow, transform into an add.
2335   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2336     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2337                      DAG.getConstant(0, DL, CarryVT));
2338 
2339   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2340   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
2341     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2342                               DAG.getConstant(0, DL, VT),
2343                               N0.getOperand(0));
2344     return CombineTo(N, Sub,
2345                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2346   }
2347 
2348   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2349     return Combined;
2350 
2351   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2352     return Combined;
2353 
2354   return SDValue();
2355 }
2356 
2357 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2358   auto VT = N0.getValueType();
2359 
2360   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2361   // If Y + 1 cannot overflow.
2362   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2363     SDValue Y = N1.getOperand(0);
2364     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2365     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2366       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2367                          N1.getOperand(2));
2368   }
2369 
2370   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2371   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2372     if (SDValue Carry = getAsCarry(TLI, N1))
2373       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2374                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2375 
2376   return SDValue();
2377 }
2378 
2379 SDValue DAGCombiner::visitADDE(SDNode *N) {
2380   SDValue N0 = N->getOperand(0);
2381   SDValue N1 = N->getOperand(1);
2382   SDValue CarryIn = N->getOperand(2);
2383 
2384   // canonicalize constant to RHS
2385   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2386   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2387   if (N0C && !N1C)
2388     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2389                        N1, N0, CarryIn);
2390 
2391   // fold (adde x, y, false) -> (addc x, y)
2392   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2393     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2394 
2395   return SDValue();
2396 }
2397 
2398 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2399   SDValue N0 = N->getOperand(0);
2400   SDValue N1 = N->getOperand(1);
2401   SDValue CarryIn = N->getOperand(2);
2402   SDLoc DL(N);
2403 
2404   // canonicalize constant to RHS
2405   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2406   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2407   if (N0C && !N1C)
2408     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2409 
2410   // fold (addcarry x, y, false) -> (uaddo x, y)
2411   if (isNullConstant(CarryIn)) {
2412     if (!LegalOperations ||
2413         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2414       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2415   }
2416 
2417   EVT CarryVT = CarryIn.getValueType();
2418 
2419   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2420   if (isNullConstant(N0) && isNullConstant(N1)) {
2421     EVT VT = N0.getValueType();
2422     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2423     AddToWorklist(CarryExt.getNode());
2424     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2425                                     DAG.getConstant(1, DL, VT)),
2426                      DAG.getConstant(0, DL, CarryVT));
2427   }
2428 
2429   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2430   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2431       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2432     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2433                               DAG.getConstant(0, DL, N0.getValueType()),
2434                               N0.getOperand(0), CarryIn.getOperand(0));
2435     return CombineTo(N, Sub,
2436                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2437   }
2438 
2439   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2440     return Combined;
2441 
2442   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2443     return Combined;
2444 
2445   return SDValue();
2446 }
2447 
2448 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2449                                        SDNode *N) {
2450   // Iff the flag result is dead:
2451   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2452   if ((N0.getOpcode() == ISD::ADD ||
2453        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2454       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2455     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2456                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2457 
2458   /**
2459    * When one of the addcarry argument is itself a carry, we may be facing
2460    * a diamond carry propagation. In which case we try to transform the DAG
2461    * to ensure linear carry propagation if that is possible.
2462    *
2463    * We are trying to get:
2464    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2465    */
2466   if (auto Y = getAsCarry(TLI, N1)) {
2467     /**
2468      *            (uaddo A, B)
2469      *             /       \
2470      *          Carry      Sum
2471      *            |          \
2472      *            | (addcarry *, 0, Z)
2473      *            |       /
2474      *             \   Carry
2475      *              |   /
2476      * (addcarry X, *, *)
2477      */
2478     if (Y.getOpcode() == ISD::UADDO &&
2479         CarryIn.getResNo() == 1 &&
2480         CarryIn.getOpcode() == ISD::ADDCARRY &&
2481         isNullConstant(CarryIn.getOperand(1)) &&
2482         CarryIn.getOperand(0) == Y.getValue(0)) {
2483       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2484                               Y.getOperand(0), Y.getOperand(1),
2485                               CarryIn.getOperand(2));
2486       AddToWorklist(NewY.getNode());
2487       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2488                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2489                          NewY.getValue(1));
2490     }
2491   }
2492 
2493   return SDValue();
2494 }
2495 
2496 // Since it may not be valid to emit a fold to zero for vector initializers
2497 // check if we can before folding.
2498 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2499                              SelectionDAG &DAG, bool LegalOperations,
2500                              bool LegalTypes) {
2501   if (!VT.isVector())
2502     return DAG.getConstant(0, DL, VT);
2503   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2504     return DAG.getConstant(0, DL, VT);
2505   return SDValue();
2506 }
2507 
2508 SDValue DAGCombiner::visitSUB(SDNode *N) {
2509   SDValue N0 = N->getOperand(0);
2510   SDValue N1 = N->getOperand(1);
2511   EVT VT = N0.getValueType();
2512   SDLoc DL(N);
2513 
2514   // fold vector ops
2515   if (VT.isVector()) {
2516     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2517       return FoldedVOp;
2518 
2519     // fold (sub x, 0) -> x, vector edition
2520     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2521       return N0;
2522   }
2523 
2524   // fold (sub x, x) -> 0
2525   // FIXME: Refactor this and xor and other similar operations together.
2526   if (N0 == N1)
2527     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2528   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2529       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2530     // fold (sub c1, c2) -> c1-c2
2531     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2532                                       N1.getNode());
2533   }
2534 
2535   if (SDValue NewSel = foldBinOpIntoSelect(N))
2536     return NewSel;
2537 
2538   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2539 
2540   // fold (sub x, c) -> (add x, -c)
2541   if (N1C) {
2542     return DAG.getNode(ISD::ADD, DL, VT, N0,
2543                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2544   }
2545 
2546   if (isNullConstantOrNullSplatConstant(N0)) {
2547     unsigned BitWidth = VT.getScalarSizeInBits();
2548     // Right-shifting everything out but the sign bit followed by negation is
2549     // the same as flipping arithmetic/logical shift type without the negation:
2550     // -(X >>u 31) -> (X >>s 31)
2551     // -(X >>s 31) -> (X >>u 31)
2552     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2553       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2554       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2555         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2556         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2557           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2558       }
2559     }
2560 
2561     // 0 - X --> 0 if the sub is NUW.
2562     if (N->getFlags().hasNoUnsignedWrap())
2563       return N0;
2564 
2565     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2566       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2567       // N1 must be 0 because negating the minimum signed value is undefined.
2568       if (N->getFlags().hasNoSignedWrap())
2569         return N0;
2570 
2571       // 0 - X --> X if X is 0 or the minimum signed value.
2572       return N1;
2573     }
2574   }
2575 
2576   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2577   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2578     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2579 
2580   // fold A-(A-B) -> B
2581   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2582     return N1.getOperand(1);
2583 
2584   // fold (A+B)-A -> B
2585   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2586     return N0.getOperand(1);
2587 
2588   // fold (A+B)-B -> A
2589   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2590     return N0.getOperand(0);
2591 
2592   // fold C2-(A+C1) -> (C2-C1)-A
2593   if (N1.getOpcode() == ISD::ADD) {
2594     SDValue N11 = N1.getOperand(1);
2595     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2596         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2597       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2598       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2599     }
2600   }
2601 
2602   // fold ((A+(B+or-C))-B) -> A+or-C
2603   if (N0.getOpcode() == ISD::ADD &&
2604       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2605        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2606       N0.getOperand(1).getOperand(0) == N1)
2607     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2608                        N0.getOperand(1).getOperand(1));
2609 
2610   // fold ((A+(C+B))-B) -> A+C
2611   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2612       N0.getOperand(1).getOperand(1) == N1)
2613     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2614                        N0.getOperand(1).getOperand(0));
2615 
2616   // fold ((A-(B-C))-C) -> A-B
2617   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2618       N0.getOperand(1).getOperand(1) == N1)
2619     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2620                        N0.getOperand(1).getOperand(0));
2621 
2622   // If either operand of a sub is undef, the result is undef
2623   if (N0.isUndef())
2624     return N0;
2625   if (N1.isUndef())
2626     return N1;
2627 
2628   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2629     return V;
2630 
2631   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2632   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2633     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2634       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2635       SDValue S0 = N1.getOperand(0);
2636       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2637         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2638         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2639           if (C->getAPIntValue() == (OpSizeInBits - 1))
2640             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2641       }
2642     }
2643   }
2644 
2645   // If the relocation model supports it, consider symbol offsets.
2646   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2647     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2648       // fold (sub Sym, c) -> Sym-c
2649       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2650         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2651                                     GA->getOffset() -
2652                                         (uint64_t)N1C->getSExtValue());
2653       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2654       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2655         if (GA->getGlobal() == GB->getGlobal())
2656           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2657                                  DL, VT);
2658     }
2659 
2660   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2661   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2662     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2663     if (TN->getVT() == MVT::i1) {
2664       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2665                                  DAG.getConstant(1, DL, VT));
2666       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2667     }
2668   }
2669 
2670   return SDValue();
2671 }
2672 
2673 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2674   SDValue N0 = N->getOperand(0);
2675   SDValue N1 = N->getOperand(1);
2676   EVT VT = N0.getValueType();
2677   SDLoc DL(N);
2678 
2679   // If the flag result is dead, turn this into an SUB.
2680   if (!N->hasAnyUseOfValue(1))
2681     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2682                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2683 
2684   // fold (subc x, x) -> 0 + no borrow
2685   if (N0 == N1)
2686     return CombineTo(N, DAG.getConstant(0, DL, VT),
2687                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2688 
2689   // fold (subc x, 0) -> x + no borrow
2690   if (isNullConstant(N1))
2691     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2692 
2693   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2694   if (isAllOnesConstant(N0))
2695     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2696                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2697 
2698   return SDValue();
2699 }
2700 
2701 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2702   SDValue N0 = N->getOperand(0);
2703   SDValue N1 = N->getOperand(1);
2704   EVT VT = N0.getValueType();
2705   if (VT.isVector())
2706     return SDValue();
2707 
2708   EVT CarryVT = N->getValueType(1);
2709   SDLoc DL(N);
2710 
2711   // If the flag result is dead, turn this into an SUB.
2712   if (!N->hasAnyUseOfValue(1))
2713     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2714                      DAG.getUNDEF(CarryVT));
2715 
2716   // fold (usubo x, x) -> 0 + no borrow
2717   if (N0 == N1)
2718     return CombineTo(N, DAG.getConstant(0, DL, VT),
2719                      DAG.getConstant(0, DL, CarryVT));
2720 
2721   // fold (usubo x, 0) -> x + no borrow
2722   if (isNullConstant(N1))
2723     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2724 
2725   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2726   if (isAllOnesConstant(N0))
2727     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2728                      DAG.getConstant(0, DL, CarryVT));
2729 
2730   return SDValue();
2731 }
2732 
2733 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2734   SDValue N0 = N->getOperand(0);
2735   SDValue N1 = N->getOperand(1);
2736   SDValue CarryIn = N->getOperand(2);
2737 
2738   // fold (sube x, y, false) -> (subc x, y)
2739   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2740     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2741 
2742   return SDValue();
2743 }
2744 
2745 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2746   SDValue N0 = N->getOperand(0);
2747   SDValue N1 = N->getOperand(1);
2748   SDValue CarryIn = N->getOperand(2);
2749 
2750   // fold (subcarry x, y, false) -> (usubo x, y)
2751   if (isNullConstant(CarryIn)) {
2752     if (!LegalOperations ||
2753         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2754       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2755   }
2756 
2757   return SDValue();
2758 }
2759 
2760 SDValue DAGCombiner::visitMUL(SDNode *N) {
2761   SDValue N0 = N->getOperand(0);
2762   SDValue N1 = N->getOperand(1);
2763   EVT VT = N0.getValueType();
2764 
2765   // fold (mul x, undef) -> 0
2766   if (N0.isUndef() || N1.isUndef())
2767     return DAG.getConstant(0, SDLoc(N), VT);
2768 
2769   bool N0IsConst = false;
2770   bool N1IsConst = false;
2771   bool N1IsOpaqueConst = false;
2772   bool N0IsOpaqueConst = false;
2773   APInt ConstValue0, ConstValue1;
2774   // fold vector ops
2775   if (VT.isVector()) {
2776     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2777       return FoldedVOp;
2778 
2779     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2780     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2781     assert((!N0IsConst ||
2782             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2783            "Splat APInt should be element width");
2784     assert((!N1IsConst ||
2785             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2786            "Splat APInt should be element width");
2787   } else {
2788     N0IsConst = isa<ConstantSDNode>(N0);
2789     if (N0IsConst) {
2790       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2791       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2792     }
2793     N1IsConst = isa<ConstantSDNode>(N1);
2794     if (N1IsConst) {
2795       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2796       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2797     }
2798   }
2799 
2800   // fold (mul c1, c2) -> c1*c2
2801   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2802     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2803                                       N0.getNode(), N1.getNode());
2804 
2805   // canonicalize constant to RHS (vector doesn't have to splat)
2806   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2807      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2808     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2809   // fold (mul x, 0) -> 0
2810   if (N1IsConst && ConstValue1.isNullValue())
2811     return N1;
2812   // fold (mul x, 1) -> x
2813   if (N1IsConst && ConstValue1.isOneValue())
2814     return N0;
2815 
2816   if (SDValue NewSel = foldBinOpIntoSelect(N))
2817     return NewSel;
2818 
2819   // fold (mul x, -1) -> 0-x
2820   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2821     SDLoc DL(N);
2822     return DAG.getNode(ISD::SUB, DL, VT,
2823                        DAG.getConstant(0, DL, VT), N0);
2824   }
2825   // fold (mul x, (1 << c)) -> x << c
2826   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2827       DAG.isKnownToBeAPowerOfTwo(N1) &&
2828       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2829     SDLoc DL(N);
2830     SDValue LogBase2 = BuildLogBase2(N1, DL);
2831     AddToWorklist(LogBase2.getNode());
2832 
2833     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2834     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2835     AddToWorklist(Trunc.getNode());
2836     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2837   }
2838   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2839   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2840     unsigned Log2Val = (-ConstValue1).logBase2();
2841     SDLoc DL(N);
2842     // FIXME: If the input is something that is easily negated (e.g. a
2843     // single-use add), we should put the negate there.
2844     return DAG.getNode(ISD::SUB, DL, VT,
2845                        DAG.getConstant(0, DL, VT),
2846                        DAG.getNode(ISD::SHL, DL, VT, N0,
2847                             DAG.getConstant(Log2Val, DL,
2848                                       getShiftAmountTy(N0.getValueType()))));
2849   }
2850 
2851   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2852   if (N0.getOpcode() == ISD::SHL &&
2853       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2854       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2855     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2856     if (isConstantOrConstantVector(C3))
2857       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2858   }
2859 
2860   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2861   // use.
2862   {
2863     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2864 
2865     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2866     if (N0.getOpcode() == ISD::SHL &&
2867         isConstantOrConstantVector(N0.getOperand(1)) &&
2868         N0.getNode()->hasOneUse()) {
2869       Sh = N0; Y = N1;
2870     } else if (N1.getOpcode() == ISD::SHL &&
2871                isConstantOrConstantVector(N1.getOperand(1)) &&
2872                N1.getNode()->hasOneUse()) {
2873       Sh = N1; Y = N0;
2874     }
2875 
2876     if (Sh.getNode()) {
2877       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2878       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2879     }
2880   }
2881 
2882   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2883   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2884       N0.getOpcode() == ISD::ADD &&
2885       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2886       isMulAddWithConstProfitable(N, N0, N1))
2887       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2888                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2889                                      N0.getOperand(0), N1),
2890                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2891                                      N0.getOperand(1), N1));
2892 
2893   // reassociate mul
2894   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2895     return RMUL;
2896 
2897   return SDValue();
2898 }
2899 
2900 /// Return true if divmod libcall is available.
2901 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2902                                      const TargetLowering &TLI) {
2903   RTLIB::Libcall LC;
2904   EVT NodeType = Node->getValueType(0);
2905   if (!NodeType.isSimple())
2906     return false;
2907   switch (NodeType.getSimpleVT().SimpleTy) {
2908   default: return false; // No libcall for vector types.
2909   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2910   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2911   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2912   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2913   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2914   }
2915 
2916   return TLI.getLibcallName(LC) != nullptr;
2917 }
2918 
2919 /// Issue divrem if both quotient and remainder are needed.
2920 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2921   if (Node->use_empty())
2922     return SDValue(); // This is a dead node, leave it alone.
2923 
2924   unsigned Opcode = Node->getOpcode();
2925   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2926   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2927 
2928   // DivMod lib calls can still work on non-legal types if using lib-calls.
2929   EVT VT = Node->getValueType(0);
2930   if (VT.isVector() || !VT.isInteger())
2931     return SDValue();
2932 
2933   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2934     return SDValue();
2935 
2936   // If DIVREM is going to get expanded into a libcall,
2937   // but there is no libcall available, then don't combine.
2938   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2939       !isDivRemLibcallAvailable(Node, isSigned, TLI))
2940     return SDValue();
2941 
2942   // If div is legal, it's better to do the normal expansion
2943   unsigned OtherOpcode = 0;
2944   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2945     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2946     if (TLI.isOperationLegalOrCustom(Opcode, VT))
2947       return SDValue();
2948   } else {
2949     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2950     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2951       return SDValue();
2952   }
2953 
2954   SDValue Op0 = Node->getOperand(0);
2955   SDValue Op1 = Node->getOperand(1);
2956   SDValue combined;
2957   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2958          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2959     SDNode *User = *UI;
2960     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
2961         User->use_empty())
2962       continue;
2963     // Convert the other matching node(s), too;
2964     // otherwise, the DIVREM may get target-legalized into something
2965     // target-specific that we won't be able to recognize.
2966     unsigned UserOpc = User->getOpcode();
2967     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2968         User->getOperand(0) == Op0 &&
2969         User->getOperand(1) == Op1) {
2970       if (!combined) {
2971         if (UserOpc == OtherOpcode) {
2972           SDVTList VTs = DAG.getVTList(VT, VT);
2973           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2974         } else if (UserOpc == DivRemOpc) {
2975           combined = SDValue(User, 0);
2976         } else {
2977           assert(UserOpc == Opcode);
2978           continue;
2979         }
2980       }
2981       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2982         CombineTo(User, combined);
2983       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2984         CombineTo(User, combined.getValue(1));
2985     }
2986   }
2987   return combined;
2988 }
2989 
2990 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
2991   SDValue N0 = N->getOperand(0);
2992   SDValue N1 = N->getOperand(1);
2993   EVT VT = N->getValueType(0);
2994   SDLoc DL(N);
2995 
2996   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2997     return DAG.getUNDEF(VT);
2998 
2999   // undef / X -> 0
3000   // undef % X -> 0
3001   if (N0.isUndef())
3002     return DAG.getConstant(0, DL, VT);
3003 
3004   return SDValue();
3005 }
3006 
3007 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3008   SDValue N0 = N->getOperand(0);
3009   SDValue N1 = N->getOperand(1);
3010   EVT VT = N->getValueType(0);
3011   unsigned BitWidth = VT.getScalarSizeInBits();
3012 
3013   // fold vector ops
3014   if (VT.isVector())
3015     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3016       return FoldedVOp;
3017 
3018   SDLoc DL(N);
3019 
3020   // fold (sdiv c1, c2) -> c1/c2
3021   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3022   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3023   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3024     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3025   // fold (sdiv X, 1) -> X
3026   if (N1C && N1C->isOne())
3027     return N0;
3028   // fold (sdiv X, -1) -> 0-X
3029   if (N1C && N1C->isAllOnesValue())
3030     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3031   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3032   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3033     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, VT, N0, N1, ISD::SETEQ),
3034                          DAG.getConstant(1, DL, VT),
3035                          DAG.getConstant(0, DL, VT));
3036 
3037   if (SDValue V = simplifyDivRem(N, DAG))
3038     return V;
3039 
3040   if (SDValue NewSel = foldBinOpIntoSelect(N))
3041     return NewSel;
3042 
3043   // If we know the sign bits of both operands are zero, strength reduce to a
3044   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3045   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3046     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3047 
3048   // Helper for determining whether a value is a power-2 constant scalar or a
3049   // vector of such elements.
3050   SmallBitVector KnownNegatives(
3051       (N1C || !VT.isVector()) ? 1 : VT.getVectorNumElements(), false);
3052   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3053     if (C->isNullValue() || C->isOpaque())
3054       return false;
3055     if (C->getAPIntValue().isAllOnesValue())
3056       return false;
3057     if (C->getAPIntValue().isMinSignedValue())
3058       return false;
3059 
3060     if (C->getAPIntValue().isPowerOf2())
3061       return true;
3062     if ((-C->getAPIntValue()).isPowerOf2())
3063       return true;
3064     return false;
3065   };
3066 
3067   // fold (sdiv X, pow2) -> simple ops after legalize
3068   // FIXME: We check for the exact bit here because the generic lowering gives
3069   // better results in that case. The target-specific lowering should learn how
3070   // to handle exact sdivs efficiently.
3071   if (!N->getFlags().hasExact() &&
3072       ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
3073     // Target-specific implementation of sdiv x, pow2.
3074     if (SDValue Res = BuildSDIVPow2(N))
3075       return Res;
3076 
3077     // Create constants that are functions of the shift amount value.
3078     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3079     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3080     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3081     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3082     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3083     if (!isConstantOrConstantVector(Inexact))
3084       return SDValue();
3085 
3086     // Splat the sign bit into the register
3087     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3088                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3089     AddToWorklist(Sign.getNode());
3090 
3091     // Add (N0 < 0) ? abs2 - 1 : 0;
3092     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3093     AddToWorklist(Srl.getNode());
3094     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3095     AddToWorklist(Add.getNode());
3096     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3097     AddToWorklist(Sra.getNode());
3098 
3099     // If dividing by a positive value, we're done. Otherwise, the result must
3100     // be negated.
3101     SDValue Zero = DAG.getConstant(0, DL, VT);
3102     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3103 
3104     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3105     SDValue Res = DAG.getSelect(
3106         DL, VT, DAG.getSetCC(DL, VT, N1, Zero, ISD::SETLT), Sub, Sra);
3107     // Special case: (sdiv X, 1) -> X
3108     SDValue One = DAG.getConstant(1, DL, VT);
3109     Res = DAG.getSelect(DL, VT, DAG.getSetCC(DL, VT, N1, One, ISD::SETEQ), N0,
3110                         Res);
3111     return Res;
3112   }
3113 
3114   // If integer divide is expensive and we satisfy the requirements, emit an
3115   // alternate sequence.  Targets may check function attributes for size/speed
3116   // trade-offs.
3117   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3118   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3119     if (SDValue Op = BuildSDIV(N))
3120       return Op;
3121 
3122   // sdiv, srem -> sdivrem
3123   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3124   // true.  Otherwise, we break the simplification logic in visitREM().
3125   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3126     if (SDValue DivRem = useDivRem(N))
3127         return DivRem;
3128 
3129   return SDValue();
3130 }
3131 
3132 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3133   SDValue N0 = N->getOperand(0);
3134   SDValue N1 = N->getOperand(1);
3135   EVT VT = N->getValueType(0);
3136 
3137   // fold vector ops
3138   if (VT.isVector())
3139     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3140       return FoldedVOp;
3141 
3142   SDLoc DL(N);
3143 
3144   // fold (udiv c1, c2) -> c1/c2
3145   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3146   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3147   if (N0C && N1C)
3148     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3149                                                     N0C, N1C))
3150       return Folded;
3151 
3152   if (SDValue V = simplifyDivRem(N, DAG))
3153     return V;
3154 
3155   if (SDValue NewSel = foldBinOpIntoSelect(N))
3156     return NewSel;
3157 
3158   // fold (udiv x, (1 << c)) -> x >>u c
3159   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3160       DAG.isKnownToBeAPowerOfTwo(N1)) {
3161     SDValue LogBase2 = BuildLogBase2(N1, DL);
3162     AddToWorklist(LogBase2.getNode());
3163 
3164     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3165     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3166     AddToWorklist(Trunc.getNode());
3167     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3168   }
3169 
3170   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3171   if (N1.getOpcode() == ISD::SHL) {
3172     SDValue N10 = N1.getOperand(0);
3173     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3174         DAG.isKnownToBeAPowerOfTwo(N10)) {
3175       SDValue LogBase2 = BuildLogBase2(N10, DL);
3176       AddToWorklist(LogBase2.getNode());
3177 
3178       EVT ADDVT = N1.getOperand(1).getValueType();
3179       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3180       AddToWorklist(Trunc.getNode());
3181       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3182       AddToWorklist(Add.getNode());
3183       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3184     }
3185   }
3186 
3187   // fold (udiv x, c) -> alternate
3188   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3189   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3190     if (SDValue Op = BuildUDIV(N))
3191       return Op;
3192 
3193   // sdiv, srem -> sdivrem
3194   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3195   // true.  Otherwise, we break the simplification logic in visitREM().
3196   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3197     if (SDValue DivRem = useDivRem(N))
3198         return DivRem;
3199 
3200   return SDValue();
3201 }
3202 
3203 // handles ISD::SREM and ISD::UREM
3204 SDValue DAGCombiner::visitREM(SDNode *N) {
3205   unsigned Opcode = N->getOpcode();
3206   SDValue N0 = N->getOperand(0);
3207   SDValue N1 = N->getOperand(1);
3208   EVT VT = N->getValueType(0);
3209   bool isSigned = (Opcode == ISD::SREM);
3210   SDLoc DL(N);
3211 
3212   // fold (rem c1, c2) -> c1%c2
3213   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3214   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3215   if (N0C && N1C)
3216     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3217       return Folded;
3218 
3219   if (SDValue V = simplifyDivRem(N, DAG))
3220     return V;
3221 
3222   if (SDValue NewSel = foldBinOpIntoSelect(N))
3223     return NewSel;
3224 
3225   if (isSigned) {
3226     // If we know the sign bits of both operands are zero, strength reduce to a
3227     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3228     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3229       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3230   } else {
3231     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3232     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3233       // fold (urem x, pow2) -> (and x, pow2-1)
3234       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3235       AddToWorklist(Add.getNode());
3236       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3237     }
3238     if (N1.getOpcode() == ISD::SHL &&
3239         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3240       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3241       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3242       AddToWorklist(Add.getNode());
3243       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3244     }
3245   }
3246 
3247   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3248 
3249   // If X/C can be simplified by the division-by-constant logic, lower
3250   // X%C to the equivalent of X-X/C*C.
3251   // To avoid mangling nodes, this simplification requires that the combine()
3252   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
3253   // against this by skipping the simplification if isIntDivCheap().  When
3254   // div is not cheap, combine will not return a DIVREM.  Regardless,
3255   // checking cheapness here makes sense since the simplification results in
3256   // fatter code.
3257   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3258     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3259     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3260     AddToWorklist(Div.getNode());
3261     SDValue OptimizedDiv = combine(Div.getNode());
3262     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode() &&
3263         OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3264         OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3265       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3266       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3267       AddToWorklist(Mul.getNode());
3268       return Sub;
3269     }
3270   }
3271 
3272   // sdiv, srem -> sdivrem
3273   if (SDValue DivRem = useDivRem(N))
3274     return DivRem.getValue(1);
3275 
3276   return SDValue();
3277 }
3278 
3279 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3280   SDValue N0 = N->getOperand(0);
3281   SDValue N1 = N->getOperand(1);
3282   EVT VT = N->getValueType(0);
3283   SDLoc DL(N);
3284 
3285   if (VT.isVector()) {
3286     // fold (mulhs x, 0) -> 0
3287     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3288       return N1;
3289     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3290       return N0;
3291   }
3292 
3293   // fold (mulhs x, 0) -> 0
3294   if (isNullConstant(N1))
3295     return N1;
3296   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3297   if (isOneConstant(N1))
3298     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3299                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3300                                        getShiftAmountTy(N0.getValueType())));
3301 
3302   // fold (mulhs x, undef) -> 0
3303   if (N0.isUndef() || N1.isUndef())
3304     return DAG.getConstant(0, DL, VT);
3305 
3306   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3307   // plus a shift.
3308   if (VT.isSimple() && !VT.isVector()) {
3309     MVT Simple = VT.getSimpleVT();
3310     unsigned SimpleSize = Simple.getSizeInBits();
3311     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3312     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3313       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3314       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3315       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3316       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3317             DAG.getConstant(SimpleSize, DL,
3318                             getShiftAmountTy(N1.getValueType())));
3319       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3320     }
3321   }
3322 
3323   return SDValue();
3324 }
3325 
3326 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3327   SDValue N0 = N->getOperand(0);
3328   SDValue N1 = N->getOperand(1);
3329   EVT VT = N->getValueType(0);
3330   SDLoc DL(N);
3331 
3332   if (VT.isVector()) {
3333     // fold (mulhu x, 0) -> 0
3334     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3335       return N1;
3336     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3337       return N0;
3338   }
3339 
3340   // fold (mulhu x, 0) -> 0
3341   if (isNullConstant(N1))
3342     return N1;
3343   // fold (mulhu x, 1) -> 0
3344   if (isOneConstant(N1))
3345     return DAG.getConstant(0, DL, N0.getValueType());
3346   // fold (mulhu x, undef) -> 0
3347   if (N0.isUndef() || N1.isUndef())
3348     return DAG.getConstant(0, DL, VT);
3349 
3350   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3351   // plus a shift.
3352   if (VT.isSimple() && !VT.isVector()) {
3353     MVT Simple = VT.getSimpleVT();
3354     unsigned SimpleSize = Simple.getSizeInBits();
3355     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3356     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3357       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3358       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3359       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3360       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3361             DAG.getConstant(SimpleSize, DL,
3362                             getShiftAmountTy(N1.getValueType())));
3363       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3364     }
3365   }
3366 
3367   return SDValue();
3368 }
3369 
3370 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3371 /// give the opcodes for the two computations that are being performed. Return
3372 /// true if a simplification was made.
3373 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3374                                                 unsigned HiOp) {
3375   // If the high half is not needed, just compute the low half.
3376   bool HiExists = N->hasAnyUseOfValue(1);
3377   if (!HiExists &&
3378       (!LegalOperations ||
3379        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3380     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3381     return CombineTo(N, Res, Res);
3382   }
3383 
3384   // If the low half is not needed, just compute the high half.
3385   bool LoExists = N->hasAnyUseOfValue(0);
3386   if (!LoExists &&
3387       (!LegalOperations ||
3388        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3389     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3390     return CombineTo(N, Res, Res);
3391   }
3392 
3393   // If both halves are used, return as it is.
3394   if (LoExists && HiExists)
3395     return SDValue();
3396 
3397   // If the two computed results can be simplified separately, separate them.
3398   if (LoExists) {
3399     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3400     AddToWorklist(Lo.getNode());
3401     SDValue LoOpt = combine(Lo.getNode());
3402     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3403         (!LegalOperations ||
3404          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3405       return CombineTo(N, LoOpt, LoOpt);
3406   }
3407 
3408   if (HiExists) {
3409     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3410     AddToWorklist(Hi.getNode());
3411     SDValue HiOpt = combine(Hi.getNode());
3412     if (HiOpt.getNode() && HiOpt != Hi &&
3413         (!LegalOperations ||
3414          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3415       return CombineTo(N, HiOpt, HiOpt);
3416   }
3417 
3418   return SDValue();
3419 }
3420 
3421 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3422   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3423     return Res;
3424 
3425   EVT VT = N->getValueType(0);
3426   SDLoc DL(N);
3427 
3428   // If the type is twice as wide is legal, transform the mulhu to a wider
3429   // multiply plus a shift.
3430   if (VT.isSimple() && !VT.isVector()) {
3431     MVT Simple = VT.getSimpleVT();
3432     unsigned SimpleSize = Simple.getSizeInBits();
3433     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3434     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3435       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3436       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3437       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3438       // Compute the high part as N1.
3439       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3440             DAG.getConstant(SimpleSize, DL,
3441                             getShiftAmountTy(Lo.getValueType())));
3442       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3443       // Compute the low part as N0.
3444       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3445       return CombineTo(N, Lo, Hi);
3446     }
3447   }
3448 
3449   return SDValue();
3450 }
3451 
3452 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3453   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3454     return Res;
3455 
3456   EVT VT = N->getValueType(0);
3457   SDLoc DL(N);
3458 
3459   // If the type is twice as wide is legal, transform the mulhu to a wider
3460   // multiply plus a shift.
3461   if (VT.isSimple() && !VT.isVector()) {
3462     MVT Simple = VT.getSimpleVT();
3463     unsigned SimpleSize = Simple.getSizeInBits();
3464     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3465     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3466       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3467       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3468       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3469       // Compute the high part as N1.
3470       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3471             DAG.getConstant(SimpleSize, DL,
3472                             getShiftAmountTy(Lo.getValueType())));
3473       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3474       // Compute the low part as N0.
3475       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3476       return CombineTo(N, Lo, Hi);
3477     }
3478   }
3479 
3480   return SDValue();
3481 }
3482 
3483 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3484   // (smulo x, 2) -> (saddo x, x)
3485   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3486     if (C2->getAPIntValue() == 2)
3487       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3488                          N->getOperand(0), N->getOperand(0));
3489 
3490   return SDValue();
3491 }
3492 
3493 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3494   // (umulo x, 2) -> (uaddo x, x)
3495   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3496     if (C2->getAPIntValue() == 2)
3497       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3498                          N->getOperand(0), N->getOperand(0));
3499 
3500   return SDValue();
3501 }
3502 
3503 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3504   SDValue N0 = N->getOperand(0);
3505   SDValue N1 = N->getOperand(1);
3506   EVT VT = N0.getValueType();
3507 
3508   // fold vector ops
3509   if (VT.isVector())
3510     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3511       return FoldedVOp;
3512 
3513   // fold operation with constant operands.
3514   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3515   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3516   if (N0C && N1C)
3517     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3518 
3519   // canonicalize constant to RHS
3520   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3521      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3522     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3523 
3524   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3525   // Only do this if the current op isn't legal and the flipped is.
3526   unsigned Opcode = N->getOpcode();
3527   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3528   if (!TLI.isOperationLegal(Opcode, VT) &&
3529       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3530       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3531     unsigned AltOpcode;
3532     switch (Opcode) {
3533     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3534     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3535     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3536     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3537     default: llvm_unreachable("Unknown MINMAX opcode");
3538     }
3539     if (TLI.isOperationLegal(AltOpcode, VT))
3540       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3541   }
3542 
3543   return SDValue();
3544 }
3545 
3546 /// If this is a binary operator with two operands of the same opcode, try to
3547 /// simplify it.
3548 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3549   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3550   EVT VT = N0.getValueType();
3551   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3552 
3553   // Bail early if none of these transforms apply.
3554   if (N0.getNumOperands() == 0) return SDValue();
3555 
3556   // For each of OP in AND/OR/XOR:
3557   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3558   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3559   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3560   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3561   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3562   //
3563   // do not sink logical op inside of a vector extend, since it may combine
3564   // into a vsetcc.
3565   EVT Op0VT = N0.getOperand(0).getValueType();
3566   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3567        N0.getOpcode() == ISD::SIGN_EXTEND ||
3568        N0.getOpcode() == ISD::BSWAP ||
3569        // Avoid infinite looping with PromoteIntBinOp.
3570        (N0.getOpcode() == ISD::ANY_EXTEND &&
3571         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3572        (N0.getOpcode() == ISD::TRUNCATE &&
3573         (!TLI.isZExtFree(VT, Op0VT) ||
3574          !TLI.isTruncateFree(Op0VT, VT)) &&
3575         TLI.isTypeLegal(Op0VT))) &&
3576       !VT.isVector() &&
3577       Op0VT == N1.getOperand(0).getValueType() &&
3578       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3579     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3580                                  N0.getOperand(0).getValueType(),
3581                                  N0.getOperand(0), N1.getOperand(0));
3582     AddToWorklist(ORNode.getNode());
3583     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3584   }
3585 
3586   // For each of OP in SHL/SRL/SRA/AND...
3587   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3588   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3589   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3590   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3591        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3592       N0.getOperand(1) == N1.getOperand(1)) {
3593     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3594                                  N0.getOperand(0).getValueType(),
3595                                  N0.getOperand(0), N1.getOperand(0));
3596     AddToWorklist(ORNode.getNode());
3597     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3598                        ORNode, N0.getOperand(1));
3599   }
3600 
3601   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3602   // Only perform this optimization up until type legalization, before
3603   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3604   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3605   // we don't want to undo this promotion.
3606   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3607   // on scalars.
3608   if ((N0.getOpcode() == ISD::BITCAST ||
3609        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3610        Level <= AfterLegalizeTypes) {
3611     SDValue In0 = N0.getOperand(0);
3612     SDValue In1 = N1.getOperand(0);
3613     EVT In0Ty = In0.getValueType();
3614     EVT In1Ty = In1.getValueType();
3615     SDLoc DL(N);
3616     // If both incoming values are integers, and the original types are the
3617     // same.
3618     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3619       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3620       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3621       AddToWorklist(Op.getNode());
3622       return BC;
3623     }
3624   }
3625 
3626   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3627   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3628   // If both shuffles use the same mask, and both shuffle within a single
3629   // vector, then it is worthwhile to move the swizzle after the operation.
3630   // The type-legalizer generates this pattern when loading illegal
3631   // vector types from memory. In many cases this allows additional shuffle
3632   // optimizations.
3633   // There are other cases where moving the shuffle after the xor/and/or
3634   // is profitable even if shuffles don't perform a swizzle.
3635   // If both shuffles use the same mask, and both shuffles have the same first
3636   // or second operand, then it might still be profitable to move the shuffle
3637   // after the xor/and/or operation.
3638   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3639     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3640     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3641 
3642     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3643            "Inputs to shuffles are not the same type");
3644 
3645     // Check that both shuffles use the same mask. The masks are known to be of
3646     // the same length because the result vector type is the same.
3647     // Check also that shuffles have only one use to avoid introducing extra
3648     // instructions.
3649     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3650         SVN0->getMask().equals(SVN1->getMask())) {
3651       SDValue ShOp = N0->getOperand(1);
3652 
3653       // Don't try to fold this node if it requires introducing a
3654       // build vector of all zeros that might be illegal at this stage.
3655       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3656         if (!LegalTypes)
3657           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3658         else
3659           ShOp = SDValue();
3660       }
3661 
3662       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3663       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
3664       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3665       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3666         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3667                                       N0->getOperand(0), N1->getOperand(0));
3668         AddToWorklist(NewNode.getNode());
3669         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3670                                     SVN0->getMask());
3671       }
3672 
3673       // Don't try to fold this node if it requires introducing a
3674       // build vector of all zeros that might be illegal at this stage.
3675       ShOp = N0->getOperand(0);
3676       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3677         if (!LegalTypes)
3678           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3679         else
3680           ShOp = SDValue();
3681       }
3682 
3683       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3684       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
3685       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3686       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3687         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3688                                       N0->getOperand(1), N1->getOperand(1));
3689         AddToWorklist(NewNode.getNode());
3690         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3691                                     SVN0->getMask());
3692       }
3693     }
3694   }
3695 
3696   return SDValue();
3697 }
3698 
3699 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3700 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3701                                        const SDLoc &DL) {
3702   SDValue LL, LR, RL, RR, N0CC, N1CC;
3703   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3704       !isSetCCEquivalent(N1, RL, RR, N1CC))
3705     return SDValue();
3706 
3707   assert(N0.getValueType() == N1.getValueType() &&
3708          "Unexpected operand types for bitwise logic op");
3709   assert(LL.getValueType() == LR.getValueType() &&
3710          RL.getValueType() == RR.getValueType() &&
3711          "Unexpected operand types for setcc");
3712 
3713   // If we're here post-legalization or the logic op type is not i1, the logic
3714   // op type must match a setcc result type. Also, all folds require new
3715   // operations on the left and right operands, so those types must match.
3716   EVT VT = N0.getValueType();
3717   EVT OpVT = LL.getValueType();
3718   if (LegalOperations || VT.getScalarType() != MVT::i1)
3719     if (VT != getSetCCResultType(OpVT))
3720       return SDValue();
3721   if (OpVT != RL.getValueType())
3722     return SDValue();
3723 
3724   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3725   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3726   bool IsInteger = OpVT.isInteger();
3727   if (LR == RR && CC0 == CC1 && IsInteger) {
3728     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3729     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3730 
3731     // All bits clear?
3732     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3733     // All sign bits clear?
3734     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3735     // Any bits set?
3736     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3737     // Any sign bits set?
3738     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3739 
3740     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3741     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3742     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3743     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3744     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3745       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3746       AddToWorklist(Or.getNode());
3747       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3748     }
3749 
3750     // All bits set?
3751     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3752     // All sign bits set?
3753     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3754     // Any bits clear?
3755     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3756     // Any sign bits clear?
3757     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3758 
3759     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3760     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3761     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3762     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3763     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3764       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3765       AddToWorklist(And.getNode());
3766       return DAG.getSetCC(DL, VT, And, LR, CC1);
3767     }
3768   }
3769 
3770   // TODO: What is the 'or' equivalent of this fold?
3771   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3772   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3773       IsInteger && CC0 == ISD::SETNE &&
3774       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3775        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3776     SDValue One = DAG.getConstant(1, DL, OpVT);
3777     SDValue Two = DAG.getConstant(2, DL, OpVT);
3778     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3779     AddToWorklist(Add.getNode());
3780     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3781   }
3782 
3783   // Try more general transforms if the predicates match and the only user of
3784   // the compares is the 'and' or 'or'.
3785   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3786       N0.hasOneUse() && N1.hasOneUse()) {
3787     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3788     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3789     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3790       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3791       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3792       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3793       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3794       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3795     }
3796   }
3797 
3798   // Canonicalize equivalent operands to LL == RL.
3799   if (LL == RR && LR == RL) {
3800     CC1 = ISD::getSetCCSwappedOperands(CC1);
3801     std::swap(RL, RR);
3802   }
3803 
3804   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3805   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3806   if (LL == RL && LR == RR) {
3807     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3808                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3809     if (NewCC != ISD::SETCC_INVALID &&
3810         (!LegalOperations ||
3811          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3812           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3813       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3814   }
3815 
3816   return SDValue();
3817 }
3818 
3819 /// This contains all DAGCombine rules which reduce two values combined by
3820 /// an And operation to a single value. This makes them reusable in the context
3821 /// of visitSELECT(). Rules involving constants are not included as
3822 /// visitSELECT() already handles those cases.
3823 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3824   EVT VT = N1.getValueType();
3825   SDLoc DL(N);
3826 
3827   // fold (and x, undef) -> 0
3828   if (N0.isUndef() || N1.isUndef())
3829     return DAG.getConstant(0, DL, VT);
3830 
3831   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3832     return V;
3833 
3834   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3835       VT.getSizeInBits() <= 64) {
3836     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3837       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3838         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3839         // immediate for an add, but it is legal if its top c2 bits are set,
3840         // transform the ADD so the immediate doesn't need to be materialized
3841         // in a register.
3842         APInt ADDC = ADDI->getAPIntValue();
3843         APInt SRLC = SRLI->getAPIntValue();
3844         if (ADDC.getMinSignedBits() <= 64 &&
3845             SRLC.ult(VT.getSizeInBits()) &&
3846             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3847           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3848                                              SRLC.getZExtValue());
3849           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3850             ADDC |= Mask;
3851             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3852               SDLoc DL0(N0);
3853               SDValue NewAdd =
3854                 DAG.getNode(ISD::ADD, DL0, VT,
3855                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3856               CombineTo(N0.getNode(), NewAdd);
3857               // Return N so it doesn't get rechecked!
3858               return SDValue(N, 0);
3859             }
3860           }
3861         }
3862       }
3863     }
3864   }
3865 
3866   // Reduce bit extract of low half of an integer to the narrower type.
3867   // (and (srl i64:x, K), KMask) ->
3868   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3869   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3870     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3871       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3872         unsigned Size = VT.getSizeInBits();
3873         const APInt &AndMask = CAnd->getAPIntValue();
3874         unsigned ShiftBits = CShift->getZExtValue();
3875 
3876         // Bail out, this node will probably disappear anyway.
3877         if (ShiftBits == 0)
3878           return SDValue();
3879 
3880         unsigned MaskBits = AndMask.countTrailingOnes();
3881         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3882 
3883         if (AndMask.isMask() &&
3884             // Required bits must not span the two halves of the integer and
3885             // must fit in the half size type.
3886             (ShiftBits + MaskBits <= Size / 2) &&
3887             TLI.isNarrowingProfitable(VT, HalfVT) &&
3888             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3889             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3890             TLI.isTruncateFree(VT, HalfVT) &&
3891             TLI.isZExtFree(HalfVT, VT)) {
3892           // The isNarrowingProfitable is to avoid regressions on PPC and
3893           // AArch64 which match a few 64-bit bit insert / bit extract patterns
3894           // on downstream users of this. Those patterns could probably be
3895           // extended to handle extensions mixed in.
3896 
3897           SDValue SL(N0);
3898           assert(MaskBits <= Size);
3899 
3900           // Extracting the highest bit of the low half.
3901           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3902           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3903                                       N0.getOperand(0));
3904 
3905           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3906           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3907           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3908           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3909           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3910         }
3911       }
3912     }
3913   }
3914 
3915   return SDValue();
3916 }
3917 
3918 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3919                                    EVT LoadResultTy, EVT &ExtVT) {
3920   if (!AndC->getAPIntValue().isMask())
3921     return false;
3922 
3923   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3924 
3925   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3926   EVT LoadedVT = LoadN->getMemoryVT();
3927 
3928   if (ExtVT == LoadedVT &&
3929       (!LegalOperations ||
3930        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3931     // ZEXTLOAD will match without needing to change the size of the value being
3932     // loaded.
3933     return true;
3934   }
3935 
3936   // Do not change the width of a volatile load.
3937   if (LoadN->isVolatile())
3938     return false;
3939 
3940   // Do not generate loads of non-round integer types since these can
3941   // be expensive (and would be wrong if the type is not byte sized).
3942   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3943     return false;
3944 
3945   if (LegalOperations &&
3946       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3947     return false;
3948 
3949   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3950     return false;
3951 
3952   return true;
3953 }
3954 
3955 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3956                                     EVT &ExtVT, unsigned ShAmt) {
3957   // Don't transform one with multiple uses, this would require adding a new
3958   // load.
3959   if (!SDValue(LoadN, 0).hasOneUse())
3960     return false;
3961 
3962   if (LegalOperations &&
3963       !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3964     return false;
3965 
3966   // Do not generate loads of non-round integer types since these can
3967   // be expensive (and would be wrong if the type is not byte sized).
3968   if (!ExtVT.isRound())
3969     return false;
3970 
3971   // Don't change the width of a volatile load.
3972   if (LoadN->isVolatile())
3973     return false;
3974 
3975   // Verify that we are actually reducing a load width here.
3976   if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3977     return false;
3978 
3979   // For the transform to be legal, the load must produce only two values
3980   // (the value loaded and the chain).  Don't transform a pre-increment
3981   // load, for example, which produces an extra value.  Otherwise the
3982   // transformation is not equivalent, and the downstream logic to replace
3983   // uses gets things wrong.
3984   if (LoadN->getNumValues() > 2)
3985     return false;
3986 
3987  // Only allow byte offsets.
3988   if (ShAmt % 8)
3989     return false;
3990 
3991   // Ensure that this isn't going to produce an unsupported unaligned access.
3992   if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
3993                                        ExtVT, LoadN->getAddressSpace(),
3994                                        ShAmt / 8))
3995     return false;
3996 
3997 
3998   // If the load that we're shrinking is an extload and we're not just
3999   // discarding the extension we can't simply shrink the load. Bail.
4000   // TODO: It would be possible to merge the extensions in some cases.
4001   if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
4002       LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
4003     return false;
4004 
4005   if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
4006     return false;
4007 
4008   // It's not possible to generate a constant of extended or untyped type.
4009   EVT PtrType = LoadN->getOperand(1).getValueType();
4010   if (PtrType == MVT::Untyped || PtrType.isExtended())
4011     return false;
4012 
4013   return true;
4014 }
4015 
4016 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4017                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
4018                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4019                                     ConstantSDNode *Mask,
4020                                     SDNode *&NodeToMask) {
4021   // Recursively search for the operands, looking for loads which can be
4022   // narrowed.
4023   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4024     SDValue Op = N->getOperand(i);
4025 
4026     if (Op.getValueType().isVector())
4027       return false;
4028 
4029     // Some constants may need fixing up later if they are too large.
4030     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4031       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4032           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4033         NodesWithConsts.insert(N);
4034       continue;
4035     }
4036 
4037     if (!Op.hasOneUse())
4038       return false;
4039 
4040     switch(Op.getOpcode()) {
4041     case ISD::LOAD: {
4042       auto *Load = cast<LoadSDNode>(Op);
4043       EVT ExtVT;
4044       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4045           isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
4046 
4047         // ZEXTLOAD is already small enough.
4048         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4049             ExtVT.bitsGE(Load->getMemoryVT()))
4050           continue;
4051 
4052         // Use LE to convert equal sized loads to zext.
4053         if (ExtVT.bitsLE(Load->getMemoryVT()))
4054           Loads.insert(Load);
4055 
4056         continue;
4057       }
4058       return false;
4059     }
4060     case ISD::ZERO_EXTEND:
4061     case ISD::AssertZext: {
4062       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4063       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4064       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4065         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4066         Op.getOperand(0).getValueType();
4067 
4068       // We can accept extending nodes if the mask is wider or an equal
4069       // width to the original type.
4070       if (ExtVT.bitsGE(VT))
4071         continue;
4072       break;
4073     }
4074     case ISD::OR:
4075     case ISD::XOR:
4076     case ISD::AND:
4077       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4078                              NodeToMask))
4079         return false;
4080       continue;
4081     }
4082 
4083     // Allow one node which will masked along with any loads found.
4084     if (NodeToMask)
4085       return false;
4086 
4087     // Also ensure that the node to be masked only produces one data result.
4088     NodeToMask = Op.getNode();
4089     if (NodeToMask->getNumValues() > 1) {
4090       bool HasValue = false;
4091       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4092         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4093         if (VT != MVT::Glue && VT != MVT::Other) {
4094           if (HasValue) {
4095             NodeToMask = nullptr;
4096             return false;
4097           }
4098           HasValue = true;
4099         }
4100       }
4101       assert(HasValue && "Node to be masked has no data result?");
4102     }
4103   }
4104   return true;
4105 }
4106 
4107 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4108   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4109   if (!Mask)
4110     return false;
4111 
4112   if (!Mask->getAPIntValue().isMask())
4113     return false;
4114 
4115   // No need to do anything if the and directly uses a load.
4116   if (isa<LoadSDNode>(N->getOperand(0)))
4117     return false;
4118 
4119   SmallPtrSet<LoadSDNode*, 8> Loads;
4120   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4121   SDNode *FixupNode = nullptr;
4122   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4123     if (Loads.size() == 0)
4124       return false;
4125 
4126     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4127     SDValue MaskOp = N->getOperand(1);
4128 
4129     // If it exists, fixup the single node we allow in the tree that needs
4130     // masking.
4131     if (FixupNode) {
4132       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4133       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4134                                 FixupNode->getValueType(0),
4135                                 SDValue(FixupNode, 0), MaskOp);
4136       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4137       DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
4138                              MaskOp);
4139     }
4140 
4141     // Narrow any constants that need it.
4142     for (auto *LogicN : NodesWithConsts) {
4143       SDValue Op0 = LogicN->getOperand(0);
4144       SDValue Op1 = LogicN->getOperand(1);
4145 
4146       if (isa<ConstantSDNode>(Op0))
4147           std::swap(Op0, Op1);
4148 
4149       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4150                                 Op1, MaskOp);
4151 
4152       DAG.UpdateNodeOperands(LogicN, Op0, And);
4153     }
4154 
4155     // Create narrow loads.
4156     for (auto *Load : Loads) {
4157       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4158       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4159                                 SDValue(Load, 0), MaskOp);
4160       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4161       DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
4162       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4163       assert(NewLoad &&
4164              "Shouldn't be masking the load if it can't be narrowed");
4165       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4166     }
4167     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4168     return true;
4169   }
4170   return false;
4171 }
4172 
4173 SDValue DAGCombiner::visitAND(SDNode *N) {
4174   SDValue N0 = N->getOperand(0);
4175   SDValue N1 = N->getOperand(1);
4176   EVT VT = N1.getValueType();
4177 
4178   // x & x --> x
4179   if (N0 == N1)
4180     return N0;
4181 
4182   // fold vector ops
4183   if (VT.isVector()) {
4184     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4185       return FoldedVOp;
4186 
4187     // fold (and x, 0) -> 0, vector edition
4188     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4189       // do not return N0, because undef node may exist in N0
4190       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4191                              SDLoc(N), N0.getValueType());
4192     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4193       // do not return N1, because undef node may exist in N1
4194       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4195                              SDLoc(N), N1.getValueType());
4196 
4197     // fold (and x, -1) -> x, vector edition
4198     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4199       return N1;
4200     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4201       return N0;
4202   }
4203 
4204   // fold (and c1, c2) -> c1&c2
4205   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4206   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4207   if (N0C && N1C && !N1C->isOpaque())
4208     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4209   // canonicalize constant to RHS
4210   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4211      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4212     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4213   // fold (and x, -1) -> x
4214   if (isAllOnesConstant(N1))
4215     return N0;
4216   // if (and x, c) is known to be zero, return 0
4217   unsigned BitWidth = VT.getScalarSizeInBits();
4218   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4219                                    APInt::getAllOnesValue(BitWidth)))
4220     return DAG.getConstant(0, SDLoc(N), VT);
4221 
4222   if (SDValue NewSel = foldBinOpIntoSelect(N))
4223     return NewSel;
4224 
4225   // reassociate and
4226   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4227     return RAND;
4228 
4229   // Try to convert a constant mask AND into a shuffle clear mask.
4230   if (VT.isVector())
4231     if (SDValue Shuffle = XformToShuffleWithZero(N))
4232       return Shuffle;
4233 
4234   // fold (and (or x, C), D) -> D if (C & D) == D
4235   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4236     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4237   };
4238   if (N0.getOpcode() == ISD::OR &&
4239       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4240     return N1;
4241   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4242   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4243     SDValue N0Op0 = N0.getOperand(0);
4244     APInt Mask = ~N1C->getAPIntValue();
4245     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4246     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4247       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4248                                  N0.getValueType(), N0Op0);
4249 
4250       // Replace uses of the AND with uses of the Zero extend node.
4251       CombineTo(N, Zext);
4252 
4253       // We actually want to replace all uses of the any_extend with the
4254       // zero_extend, to avoid duplicating things.  This will later cause this
4255       // AND to be folded.
4256       CombineTo(N0.getNode(), Zext);
4257       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4258     }
4259   }
4260   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4261   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4262   // already be zero by virtue of the width of the base type of the load.
4263   //
4264   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4265   // more cases.
4266   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4267        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4268        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4269        N0.getOperand(0).getResNo() == 0) ||
4270       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4271     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4272                                          N0 : N0.getOperand(0) );
4273 
4274     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4275     // This can be a pure constant or a vector splat, in which case we treat the
4276     // vector as a scalar and use the splat value.
4277     APInt Constant = APInt::getNullValue(1);
4278     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4279       Constant = C->getAPIntValue();
4280     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4281       APInt SplatValue, SplatUndef;
4282       unsigned SplatBitSize;
4283       bool HasAnyUndefs;
4284       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4285                                              SplatBitSize, HasAnyUndefs);
4286       if (IsSplat) {
4287         // Undef bits can contribute to a possible optimisation if set, so
4288         // set them.
4289         SplatValue |= SplatUndef;
4290 
4291         // The splat value may be something like "0x00FFFFFF", which means 0 for
4292         // the first vector value and FF for the rest, repeating. We need a mask
4293         // that will apply equally to all members of the vector, so AND all the
4294         // lanes of the constant together.
4295         EVT VT = Vector->getValueType(0);
4296         unsigned BitWidth = VT.getScalarSizeInBits();
4297 
4298         // If the splat value has been compressed to a bitlength lower
4299         // than the size of the vector lane, we need to re-expand it to
4300         // the lane size.
4301         if (BitWidth > SplatBitSize)
4302           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4303                SplatBitSize < BitWidth;
4304                SplatBitSize = SplatBitSize * 2)
4305             SplatValue |= SplatValue.shl(SplatBitSize);
4306 
4307         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4308         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4309         if (SplatBitSize % BitWidth == 0) {
4310           Constant = APInt::getAllOnesValue(BitWidth);
4311           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4312             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4313         }
4314       }
4315     }
4316 
4317     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4318     // actually legal and isn't going to get expanded, else this is a false
4319     // optimisation.
4320     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4321                                                     Load->getValueType(0),
4322                                                     Load->getMemoryVT());
4323 
4324     // Resize the constant to the same size as the original memory access before
4325     // extension. If it is still the AllOnesValue then this AND is completely
4326     // unneeded.
4327     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4328 
4329     bool B;
4330     switch (Load->getExtensionType()) {
4331     default: B = false; break;
4332     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4333     case ISD::ZEXTLOAD:
4334     case ISD::NON_EXTLOAD: B = true; break;
4335     }
4336 
4337     if (B && Constant.isAllOnesValue()) {
4338       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4339       // preserve semantics once we get rid of the AND.
4340       SDValue NewLoad(Load, 0);
4341 
4342       // Fold the AND away. NewLoad may get replaced immediately.
4343       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4344 
4345       if (Load->getExtensionType() == ISD::EXTLOAD) {
4346         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4347                               Load->getValueType(0), SDLoc(Load),
4348                               Load->getChain(), Load->getBasePtr(),
4349                               Load->getOffset(), Load->getMemoryVT(),
4350                               Load->getMemOperand());
4351         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4352         if (Load->getNumValues() == 3) {
4353           // PRE/POST_INC loads have 3 values.
4354           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4355                            NewLoad.getValue(2) };
4356           CombineTo(Load, To, 3, true);
4357         } else {
4358           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4359         }
4360       }
4361 
4362       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4363     }
4364   }
4365 
4366   // fold (and (load x), 255) -> (zextload x, i8)
4367   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4368   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4369   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4370                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4371                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4372     if (SDValue Res = ReduceLoadWidth(N)) {
4373       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4374         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4375 
4376       AddToWorklist(N);
4377       CombineTo(LN0, Res, Res.getValue(1));
4378       return SDValue(N, 0);
4379     }
4380   }
4381 
4382   if (Level >= AfterLegalizeTypes) {
4383     // Attempt to propagate the AND back up to the leaves which, if they're
4384     // loads, can be combined to narrow loads and the AND node can be removed.
4385     // Perform after legalization so that extend nodes will already be
4386     // combined into the loads.
4387     if (BackwardsPropagateMask(N, DAG)) {
4388       return SDValue(N, 0);
4389     }
4390   }
4391 
4392   if (SDValue Combined = visitANDLike(N0, N1, N))
4393     return Combined;
4394 
4395   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4396   if (N0.getOpcode() == N1.getOpcode())
4397     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4398       return Tmp;
4399 
4400   // Masking the negated extension of a boolean is just the zero-extended
4401   // boolean:
4402   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4403   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4404   //
4405   // Note: the SimplifyDemandedBits fold below can make an information-losing
4406   // transform, and then we have no way to find this better fold.
4407   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4408     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4409       SDValue SubRHS = N0.getOperand(1);
4410       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4411           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4412         return SubRHS;
4413       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4414           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4415         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4416     }
4417   }
4418 
4419   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4420   // fold (and (sra)) -> (and (srl)) when possible.
4421   if (SimplifyDemandedBits(SDValue(N, 0)))
4422     return SDValue(N, 0);
4423 
4424   // fold (zext_inreg (extload x)) -> (zextload x)
4425   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4426     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4427     EVT MemVT = LN0->getMemoryVT();
4428     // If we zero all the possible extended bits, then we can turn this into
4429     // a zextload if we are running before legalize or the operation is legal.
4430     unsigned BitWidth = N1.getScalarValueSizeInBits();
4431     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4432                            BitWidth - MemVT.getScalarSizeInBits())) &&
4433         ((!LegalOperations && !LN0->isVolatile()) ||
4434          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4435       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4436                                        LN0->getChain(), LN0->getBasePtr(),
4437                                        MemVT, LN0->getMemOperand());
4438       AddToWorklist(N);
4439       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4440       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4441     }
4442   }
4443   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4444   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4445       N0.hasOneUse()) {
4446     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4447     EVT MemVT = LN0->getMemoryVT();
4448     // If we zero all the possible extended bits, then we can turn this into
4449     // a zextload if we are running before legalize or the operation is legal.
4450     unsigned BitWidth = N1.getScalarValueSizeInBits();
4451     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4452                            BitWidth - MemVT.getScalarSizeInBits())) &&
4453         ((!LegalOperations && !LN0->isVolatile()) ||
4454          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4455       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4456                                        LN0->getChain(), LN0->getBasePtr(),
4457                                        MemVT, LN0->getMemOperand());
4458       AddToWorklist(N);
4459       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4460       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4461     }
4462   }
4463   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4464   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4465     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4466                                            N0.getOperand(1), false))
4467       return BSwap;
4468   }
4469 
4470   return SDValue();
4471 }
4472 
4473 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4474 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4475                                         bool DemandHighBits) {
4476   if (!LegalOperations)
4477     return SDValue();
4478 
4479   EVT VT = N->getValueType(0);
4480   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4481     return SDValue();
4482   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4483     return SDValue();
4484 
4485   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4486   bool LookPassAnd0 = false;
4487   bool LookPassAnd1 = false;
4488   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4489       std::swap(N0, N1);
4490   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4491       std::swap(N0, N1);
4492   if (N0.getOpcode() == ISD::AND) {
4493     if (!N0.getNode()->hasOneUse())
4494       return SDValue();
4495     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4496     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4497     // This is needed for X86.
4498     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4499                   N01C->getZExtValue() != 0xFFFF))
4500       return SDValue();
4501     N0 = N0.getOperand(0);
4502     LookPassAnd0 = true;
4503   }
4504 
4505   if (N1.getOpcode() == ISD::AND) {
4506     if (!N1.getNode()->hasOneUse())
4507       return SDValue();
4508     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4509     if (!N11C || N11C->getZExtValue() != 0xFF)
4510       return SDValue();
4511     N1 = N1.getOperand(0);
4512     LookPassAnd1 = true;
4513   }
4514 
4515   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4516     std::swap(N0, N1);
4517   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4518     return SDValue();
4519   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4520     return SDValue();
4521 
4522   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4523   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4524   if (!N01C || !N11C)
4525     return SDValue();
4526   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4527     return SDValue();
4528 
4529   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4530   SDValue N00 = N0->getOperand(0);
4531   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4532     if (!N00.getNode()->hasOneUse())
4533       return SDValue();
4534     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4535     if (!N001C || N001C->getZExtValue() != 0xFF)
4536       return SDValue();
4537     N00 = N00.getOperand(0);
4538     LookPassAnd0 = true;
4539   }
4540 
4541   SDValue N10 = N1->getOperand(0);
4542   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4543     if (!N10.getNode()->hasOneUse())
4544       return SDValue();
4545     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4546     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4547     // for X86.
4548     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4549                    N101C->getZExtValue() != 0xFFFF))
4550       return SDValue();
4551     N10 = N10.getOperand(0);
4552     LookPassAnd1 = true;
4553   }
4554 
4555   if (N00 != N10)
4556     return SDValue();
4557 
4558   // Make sure everything beyond the low halfword gets set to zero since the SRL
4559   // 16 will clear the top bits.
4560   unsigned OpSizeInBits = VT.getSizeInBits();
4561   if (DemandHighBits && OpSizeInBits > 16) {
4562     // If the left-shift isn't masked out then the only way this is a bswap is
4563     // if all bits beyond the low 8 are 0. In that case the entire pattern
4564     // reduces to a left shift anyway: leave it for other parts of the combiner.
4565     if (!LookPassAnd0)
4566       return SDValue();
4567 
4568     // However, if the right shift isn't masked out then it might be because
4569     // it's not needed. See if we can spot that too.
4570     if (!LookPassAnd1 &&
4571         !DAG.MaskedValueIsZero(
4572             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4573       return SDValue();
4574   }
4575 
4576   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4577   if (OpSizeInBits > 16) {
4578     SDLoc DL(N);
4579     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4580                       DAG.getConstant(OpSizeInBits - 16, DL,
4581                                       getShiftAmountTy(VT)));
4582   }
4583   return Res;
4584 }
4585 
4586 /// Return true if the specified node is an element that makes up a 32-bit
4587 /// packed halfword byteswap.
4588 /// ((x & 0x000000ff) << 8) |
4589 /// ((x & 0x0000ff00) >> 8) |
4590 /// ((x & 0x00ff0000) << 8) |
4591 /// ((x & 0xff000000) >> 8)
4592 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4593   if (!N.getNode()->hasOneUse())
4594     return false;
4595 
4596   unsigned Opc = N.getOpcode();
4597   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4598     return false;
4599 
4600   SDValue N0 = N.getOperand(0);
4601   unsigned Opc0 = N0.getOpcode();
4602   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4603     return false;
4604 
4605   ConstantSDNode *N1C = nullptr;
4606   // SHL or SRL: look upstream for AND mask operand
4607   if (Opc == ISD::AND)
4608     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4609   else if (Opc0 == ISD::AND)
4610     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4611   if (!N1C)
4612     return false;
4613 
4614   unsigned MaskByteOffset;
4615   switch (N1C->getZExtValue()) {
4616   default:
4617     return false;
4618   case 0xFF:       MaskByteOffset = 0; break;
4619   case 0xFF00:     MaskByteOffset = 1; break;
4620   case 0xFFFF:
4621     // In case demanded bits didn't clear the bits that will be shifted out.
4622     // This is needed for X86.
4623     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4624       MaskByteOffset = 1;
4625       break;
4626     }
4627     return false;
4628   case 0xFF0000:   MaskByteOffset = 2; break;
4629   case 0xFF000000: MaskByteOffset = 3; break;
4630   }
4631 
4632   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4633   if (Opc == ISD::AND) {
4634     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4635       // (x >> 8) & 0xff
4636       // (x >> 8) & 0xff0000
4637       if (Opc0 != ISD::SRL)
4638         return false;
4639       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4640       if (!C || C->getZExtValue() != 8)
4641         return false;
4642     } else {
4643       // (x << 8) & 0xff00
4644       // (x << 8) & 0xff000000
4645       if (Opc0 != ISD::SHL)
4646         return false;
4647       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4648       if (!C || C->getZExtValue() != 8)
4649         return false;
4650     }
4651   } else if (Opc == ISD::SHL) {
4652     // (x & 0xff) << 8
4653     // (x & 0xff0000) << 8
4654     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4655       return false;
4656     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4657     if (!C || C->getZExtValue() != 8)
4658       return false;
4659   } else { // Opc == ISD::SRL
4660     // (x & 0xff00) >> 8
4661     // (x & 0xff000000) >> 8
4662     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4663       return false;
4664     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4665     if (!C || C->getZExtValue() != 8)
4666       return false;
4667   }
4668 
4669   if (Parts[MaskByteOffset])
4670     return false;
4671 
4672   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4673   return true;
4674 }
4675 
4676 /// Match a 32-bit packed halfword bswap. That is
4677 /// ((x & 0x000000ff) << 8) |
4678 /// ((x & 0x0000ff00) >> 8) |
4679 /// ((x & 0x00ff0000) << 8) |
4680 /// ((x & 0xff000000) >> 8)
4681 /// => (rotl (bswap x), 16)
4682 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4683   if (!LegalOperations)
4684     return SDValue();
4685 
4686   EVT VT = N->getValueType(0);
4687   if (VT != MVT::i32)
4688     return SDValue();
4689   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4690     return SDValue();
4691 
4692   // Look for either
4693   // (or (or (and), (and)), (or (and), (and)))
4694   // (or (or (or (and), (and)), (and)), (and))
4695   if (N0.getOpcode() != ISD::OR)
4696     return SDValue();
4697   SDValue N00 = N0.getOperand(0);
4698   SDValue N01 = N0.getOperand(1);
4699   SDNode *Parts[4] = {};
4700 
4701   if (N1.getOpcode() == ISD::OR &&
4702       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4703     // (or (or (and), (and)), (or (and), (and)))
4704     if (!isBSwapHWordElement(N00, Parts))
4705       return SDValue();
4706 
4707     if (!isBSwapHWordElement(N01, Parts))
4708       return SDValue();
4709     SDValue N10 = N1.getOperand(0);
4710     if (!isBSwapHWordElement(N10, Parts))
4711       return SDValue();
4712     SDValue N11 = N1.getOperand(1);
4713     if (!isBSwapHWordElement(N11, Parts))
4714       return SDValue();
4715   } else {
4716     // (or (or (or (and), (and)), (and)), (and))
4717     if (!isBSwapHWordElement(N1, Parts))
4718       return SDValue();
4719     if (!isBSwapHWordElement(N01, Parts))
4720       return SDValue();
4721     if (N00.getOpcode() != ISD::OR)
4722       return SDValue();
4723     SDValue N000 = N00.getOperand(0);
4724     if (!isBSwapHWordElement(N000, Parts))
4725       return SDValue();
4726     SDValue N001 = N00.getOperand(1);
4727     if (!isBSwapHWordElement(N001, Parts))
4728       return SDValue();
4729   }
4730 
4731   // Make sure the parts are all coming from the same node.
4732   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4733     return SDValue();
4734 
4735   SDLoc DL(N);
4736   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4737                               SDValue(Parts[0], 0));
4738 
4739   // Result of the bswap should be rotated by 16. If it's not legal, then
4740   // do  (x << 16) | (x >> 16).
4741   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4742   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4743     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4744   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4745     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4746   return DAG.getNode(ISD::OR, DL, VT,
4747                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4748                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4749 }
4750 
4751 /// This contains all DAGCombine rules which reduce two values combined by
4752 /// an Or operation to a single value \see visitANDLike().
4753 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4754   EVT VT = N1.getValueType();
4755   SDLoc DL(N);
4756 
4757   // fold (or x, undef) -> -1
4758   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4759     return DAG.getAllOnesConstant(DL, VT);
4760 
4761   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4762     return V;
4763 
4764   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4765   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4766       // Don't increase # computations.
4767       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4768     // We can only do this xform if we know that bits from X that are set in C2
4769     // but not in C1 are already zero.  Likewise for Y.
4770     if (const ConstantSDNode *N0O1C =
4771         getAsNonOpaqueConstant(N0.getOperand(1))) {
4772       if (const ConstantSDNode *N1O1C =
4773           getAsNonOpaqueConstant(N1.getOperand(1))) {
4774         // We can only do this xform if we know that bits from X that are set in
4775         // C2 but not in C1 are already zero.  Likewise for Y.
4776         const APInt &LHSMask = N0O1C->getAPIntValue();
4777         const APInt &RHSMask = N1O1C->getAPIntValue();
4778 
4779         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4780             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4781           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4782                                   N0.getOperand(0), N1.getOperand(0));
4783           return DAG.getNode(ISD::AND, DL, VT, X,
4784                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4785         }
4786       }
4787     }
4788   }
4789 
4790   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4791   if (N0.getOpcode() == ISD::AND &&
4792       N1.getOpcode() == ISD::AND &&
4793       N0.getOperand(0) == N1.getOperand(0) &&
4794       // Don't increase # computations.
4795       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4796     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4797                             N0.getOperand(1), N1.getOperand(1));
4798     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4799   }
4800 
4801   return SDValue();
4802 }
4803 
4804 SDValue DAGCombiner::visitOR(SDNode *N) {
4805   SDValue N0 = N->getOperand(0);
4806   SDValue N1 = N->getOperand(1);
4807   EVT VT = N1.getValueType();
4808 
4809   // x | x --> x
4810   if (N0 == N1)
4811     return N0;
4812 
4813   // fold vector ops
4814   if (VT.isVector()) {
4815     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4816       return FoldedVOp;
4817 
4818     // fold (or x, 0) -> x, vector edition
4819     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4820       return N1;
4821     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4822       return N0;
4823 
4824     // fold (or x, -1) -> -1, vector edition
4825     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4826       // do not return N0, because undef node may exist in N0
4827       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4828     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4829       // do not return N1, because undef node may exist in N1
4830       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4831 
4832     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4833     // Do this only if the resulting shuffle is legal.
4834     if (isa<ShuffleVectorSDNode>(N0) &&
4835         isa<ShuffleVectorSDNode>(N1) &&
4836         // Avoid folding a node with illegal type.
4837         TLI.isTypeLegal(VT)) {
4838       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4839       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4840       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4841       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4842       // Ensure both shuffles have a zero input.
4843       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4844         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4845         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4846         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4847         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4848         bool CanFold = true;
4849         int NumElts = VT.getVectorNumElements();
4850         SmallVector<int, 4> Mask(NumElts);
4851 
4852         for (int i = 0; i != NumElts; ++i) {
4853           int M0 = SV0->getMaskElt(i);
4854           int M1 = SV1->getMaskElt(i);
4855 
4856           // Determine if either index is pointing to a zero vector.
4857           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4858           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4859 
4860           // If one element is zero and the otherside is undef, keep undef.
4861           // This also handles the case that both are undef.
4862           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4863             Mask[i] = -1;
4864             continue;
4865           }
4866 
4867           // Make sure only one of the elements is zero.
4868           if (M0Zero == M1Zero) {
4869             CanFold = false;
4870             break;
4871           }
4872 
4873           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4874 
4875           // We have a zero and non-zero element. If the non-zero came from
4876           // SV0 make the index a LHS index. If it came from SV1, make it
4877           // a RHS index. We need to mod by NumElts because we don't care
4878           // which operand it came from in the original shuffles.
4879           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4880         }
4881 
4882         if (CanFold) {
4883           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4884           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4885 
4886           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4887           if (!LegalMask) {
4888             std::swap(NewLHS, NewRHS);
4889             ShuffleVectorSDNode::commuteMask(Mask);
4890             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4891           }
4892 
4893           if (LegalMask)
4894             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4895         }
4896       }
4897     }
4898   }
4899 
4900   // fold (or c1, c2) -> c1|c2
4901   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4902   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4903   if (N0C && N1C && !N1C->isOpaque())
4904     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4905   // canonicalize constant to RHS
4906   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4907      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4908     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4909   // fold (or x, 0) -> x
4910   if (isNullConstant(N1))
4911     return N0;
4912   // fold (or x, -1) -> -1
4913   if (isAllOnesConstant(N1))
4914     return N1;
4915 
4916   if (SDValue NewSel = foldBinOpIntoSelect(N))
4917     return NewSel;
4918 
4919   // fold (or x, c) -> c iff (x & ~c) == 0
4920   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4921     return N1;
4922 
4923   if (SDValue Combined = visitORLike(N0, N1, N))
4924     return Combined;
4925 
4926   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4927   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4928     return BSwap;
4929   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4930     return BSwap;
4931 
4932   // reassociate or
4933   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4934     return ROR;
4935 
4936   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4937   // iff (c1 & c2) != 0.
4938   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4939     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4940   };
4941   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4942       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4943     if (SDValue COR = DAG.FoldConstantArithmetic(
4944             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4945       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4946       AddToWorklist(IOR.getNode());
4947       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4948     }
4949   }
4950 
4951   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
4952   if (N0.getOpcode() == N1.getOpcode())
4953     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4954       return Tmp;
4955 
4956   // See if this is some rotate idiom.
4957   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4958     return SDValue(Rot, 0);
4959 
4960   if (SDValue Load = MatchLoadCombine(N))
4961     return Load;
4962 
4963   // Simplify the operands using demanded-bits information.
4964   if (SimplifyDemandedBits(SDValue(N, 0)))
4965     return SDValue(N, 0);
4966 
4967   return SDValue();
4968 }
4969 
4970 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4971 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4972   if (Op.getOpcode() == ISD::AND) {
4973     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
4974       Mask = Op.getOperand(1);
4975       Op = Op.getOperand(0);
4976     } else {
4977       return false;
4978     }
4979   }
4980 
4981   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4982     Shift = Op;
4983     return true;
4984   }
4985 
4986   return false;
4987 }
4988 
4989 // Return true if we can prove that, whenever Neg and Pos are both in the
4990 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
4991 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4992 //
4993 //     (or (shift1 X, Neg), (shift2 X, Pos))
4994 //
4995 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4996 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
4997 // to consider shift amounts with defined behavior.
4998 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
4999                            SelectionDAG &DAG) {
5000   // If EltSize is a power of 2 then:
5001   //
5002   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5003   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5004   //
5005   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5006   // for the stronger condition:
5007   //
5008   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5009   //
5010   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5011   // we can just replace Neg with Neg' for the rest of the function.
5012   //
5013   // In other cases we check for the even stronger condition:
5014   //
5015   //     Neg == EltSize - Pos                                    [B]
5016   //
5017   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5018   // behavior if Pos == 0 (and consequently Neg == EltSize).
5019   //
5020   // We could actually use [A] whenever EltSize is a power of 2, but the
5021   // only extra cases that it would match are those uninteresting ones
5022   // where Neg and Pos are never in range at the same time.  E.g. for
5023   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5024   // as well as (sub 32, Pos), but:
5025   //
5026   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5027   //
5028   // always invokes undefined behavior for 32-bit X.
5029   //
5030   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5031   unsigned MaskLoBits = 0;
5032   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5033     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5034       KnownBits Known;
5035       DAG.computeKnownBits(Neg.getOperand(0), Known);
5036       unsigned Bits = Log2_64(EltSize);
5037       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5038           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5039         Neg = Neg.getOperand(0);
5040         MaskLoBits = Bits;
5041       }
5042     }
5043   }
5044 
5045   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5046   if (Neg.getOpcode() != ISD::SUB)
5047     return false;
5048   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5049   if (!NegC)
5050     return false;
5051   SDValue NegOp1 = Neg.getOperand(1);
5052 
5053   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5054   // Pos'.  The truncation is redundant for the purpose of the equality.
5055   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5056     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5057       KnownBits Known;
5058       DAG.computeKnownBits(Pos.getOperand(0), Known);
5059       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5060           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5061            MaskLoBits))
5062         Pos = Pos.getOperand(0);
5063     }
5064   }
5065 
5066   // The condition we need is now:
5067   //
5068   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5069   //
5070   // If NegOp1 == Pos then we need:
5071   //
5072   //              EltSize & Mask == NegC & Mask
5073   //
5074   // (because "x & Mask" is a truncation and distributes through subtraction).
5075   APInt Width;
5076   if (Pos == NegOp1)
5077     Width = NegC->getAPIntValue();
5078 
5079   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5080   // Then the condition we want to prove becomes:
5081   //
5082   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5083   //
5084   // which, again because "x & Mask" is a truncation, becomes:
5085   //
5086   //                NegC & Mask == (EltSize - PosC) & Mask
5087   //             EltSize & Mask == (NegC + PosC) & Mask
5088   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5089     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5090       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5091     else
5092       return false;
5093   } else
5094     return false;
5095 
5096   // Now we just need to check that EltSize & Mask == Width & Mask.
5097   if (MaskLoBits)
5098     // EltSize & Mask is 0 since Mask is EltSize - 1.
5099     return Width.getLoBits(MaskLoBits) == 0;
5100   return Width == EltSize;
5101 }
5102 
5103 // A subroutine of MatchRotate used once we have found an OR of two opposite
5104 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5105 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5106 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5107 // Neg with outer conversions stripped away.
5108 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5109                                        SDValue Neg, SDValue InnerPos,
5110                                        SDValue InnerNeg, unsigned PosOpcode,
5111                                        unsigned NegOpcode, const SDLoc &DL) {
5112   // fold (or (shl x, (*ext y)),
5113   //          (srl x, (*ext (sub 32, y)))) ->
5114   //   (rotl x, y) or (rotr x, (sub 32, y))
5115   //
5116   // fold (or (shl x, (*ext (sub 32, y))),
5117   //          (srl x, (*ext y))) ->
5118   //   (rotr x, y) or (rotl x, (sub 32, y))
5119   EVT VT = Shifted.getValueType();
5120   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5121     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5122     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5123                        HasPos ? Pos : Neg).getNode();
5124   }
5125 
5126   return nullptr;
5127 }
5128 
5129 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5130 // idioms for rotate, and if the target supports rotation instructions, generate
5131 // a rot[lr].
5132 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5133   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5134   EVT VT = LHS.getValueType();
5135   if (!TLI.isTypeLegal(VT)) return nullptr;
5136 
5137   // The target must have at least one rotate flavor.
5138   bool HasROTL = hasOperation(ISD::ROTL, VT);
5139   bool HasROTR = hasOperation(ISD::ROTR, VT);
5140   if (!HasROTL && !HasROTR) return nullptr;
5141 
5142   // Check for truncated rotate.
5143   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5144       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5145     assert(LHS.getValueType() == RHS.getValueType());
5146     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5147       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5148                          SDValue(Rot, 0)).getNode();
5149     }
5150   }
5151 
5152   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5153   SDValue LHSShift;   // The shift.
5154   SDValue LHSMask;    // AND value if any.
5155   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
5156     return nullptr; // Not part of a rotate.
5157 
5158   SDValue RHSShift;   // The shift.
5159   SDValue RHSMask;    // AND value if any.
5160   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
5161     return nullptr; // Not part of a rotate.
5162 
5163   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5164     return nullptr;   // Not shifting the same value.
5165 
5166   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5167     return nullptr;   // Shifts must disagree.
5168 
5169   // Canonicalize shl to left side in a shl/srl pair.
5170   if (RHSShift.getOpcode() == ISD::SHL) {
5171     std::swap(LHS, RHS);
5172     std::swap(LHSShift, RHSShift);
5173     std::swap(LHSMask, RHSMask);
5174   }
5175 
5176   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5177   SDValue LHSShiftArg = LHSShift.getOperand(0);
5178   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5179   SDValue RHSShiftArg = RHSShift.getOperand(0);
5180   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5181 
5182   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5183   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5184   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5185                                         ConstantSDNode *RHS) {
5186     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5187   };
5188   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5189     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5190                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5191 
5192     // If there is an AND of either shifted operand, apply it to the result.
5193     if (LHSMask.getNode() || RHSMask.getNode()) {
5194       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5195       SDValue Mask = AllOnes;
5196 
5197       if (LHSMask.getNode()) {
5198         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5199         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5200                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5201       }
5202       if (RHSMask.getNode()) {
5203         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5204         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5205                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5206       }
5207 
5208       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5209     }
5210 
5211     return Rot.getNode();
5212   }
5213 
5214   // If there is a mask here, and we have a variable shift, we can't be sure
5215   // that we're masking out the right stuff.
5216   if (LHSMask.getNode() || RHSMask.getNode())
5217     return nullptr;
5218 
5219   // If the shift amount is sign/zext/any-extended just peel it off.
5220   SDValue LExtOp0 = LHSShiftAmt;
5221   SDValue RExtOp0 = RHSShiftAmt;
5222   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5223        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5224        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5225        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5226       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5227        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5228        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5229        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5230     LExtOp0 = LHSShiftAmt.getOperand(0);
5231     RExtOp0 = RHSShiftAmt.getOperand(0);
5232   }
5233 
5234   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5235                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5236   if (TryL)
5237     return TryL;
5238 
5239   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5240                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5241   if (TryR)
5242     return TryR;
5243 
5244   return nullptr;
5245 }
5246 
5247 namespace {
5248 
5249 /// Represents known origin of an individual byte in load combine pattern. The
5250 /// value of the byte is either constant zero or comes from memory.
5251 struct ByteProvider {
5252   // For constant zero providers Load is set to nullptr. For memory providers
5253   // Load represents the node which loads the byte from memory.
5254   // ByteOffset is the offset of the byte in the value produced by the load.
5255   LoadSDNode *Load = nullptr;
5256   unsigned ByteOffset = 0;
5257 
5258   ByteProvider() = default;
5259 
5260   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5261     return ByteProvider(Load, ByteOffset);
5262   }
5263 
5264   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5265 
5266   bool isConstantZero() const { return !Load; }
5267   bool isMemory() const { return Load; }
5268 
5269   bool operator==(const ByteProvider &Other) const {
5270     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5271   }
5272 
5273 private:
5274   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5275       : Load(Load), ByteOffset(ByteOffset) {}
5276 };
5277 
5278 } // end anonymous namespace
5279 
5280 /// Recursively traverses the expression calculating the origin of the requested
5281 /// byte of the given value. Returns None if the provider can't be calculated.
5282 ///
5283 /// For all the values except the root of the expression verifies that the value
5284 /// has exactly one use and if it's not true return None. This way if the origin
5285 /// of the byte is returned it's guaranteed that the values which contribute to
5286 /// the byte are not used outside of this expression.
5287 ///
5288 /// Because the parts of the expression are not allowed to have more than one
5289 /// use this function iterates over trees, not DAGs. So it never visits the same
5290 /// node more than once.
5291 static const Optional<ByteProvider>
5292 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5293                       bool Root = false) {
5294   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5295   if (Depth == 10)
5296     return None;
5297 
5298   if (!Root && !Op.hasOneUse())
5299     return None;
5300 
5301   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5302   unsigned BitWidth = Op.getValueSizeInBits();
5303   if (BitWidth % 8 != 0)
5304     return None;
5305   unsigned ByteWidth = BitWidth / 8;
5306   assert(Index < ByteWidth && "invalid index requested");
5307   (void) ByteWidth;
5308 
5309   switch (Op.getOpcode()) {
5310   case ISD::OR: {
5311     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5312     if (!LHS)
5313       return None;
5314     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5315     if (!RHS)
5316       return None;
5317 
5318     if (LHS->isConstantZero())
5319       return RHS;
5320     if (RHS->isConstantZero())
5321       return LHS;
5322     return None;
5323   }
5324   case ISD::SHL: {
5325     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5326     if (!ShiftOp)
5327       return None;
5328 
5329     uint64_t BitShift = ShiftOp->getZExtValue();
5330     if (BitShift % 8 != 0)
5331       return None;
5332     uint64_t ByteShift = BitShift / 8;
5333 
5334     return Index < ByteShift
5335                ? ByteProvider::getConstantZero()
5336                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5337                                        Depth + 1);
5338   }
5339   case ISD::ANY_EXTEND:
5340   case ISD::SIGN_EXTEND:
5341   case ISD::ZERO_EXTEND: {
5342     SDValue NarrowOp = Op->getOperand(0);
5343     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5344     if (NarrowBitWidth % 8 != 0)
5345       return None;
5346     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5347 
5348     if (Index >= NarrowByteWidth)
5349       return Op.getOpcode() == ISD::ZERO_EXTEND
5350                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5351                  : None;
5352     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5353   }
5354   case ISD::BSWAP:
5355     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5356                                  Depth + 1);
5357   case ISD::LOAD: {
5358     auto L = cast<LoadSDNode>(Op.getNode());
5359     if (L->isVolatile() || L->isIndexed())
5360       return None;
5361 
5362     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5363     if (NarrowBitWidth % 8 != 0)
5364       return None;
5365     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5366 
5367     if (Index >= NarrowByteWidth)
5368       return L->getExtensionType() == ISD::ZEXTLOAD
5369                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5370                  : None;
5371     return ByteProvider::getMemory(L, Index);
5372   }
5373   }
5374 
5375   return None;
5376 }
5377 
5378 /// Match a pattern where a wide type scalar value is loaded by several narrow
5379 /// loads and combined by shifts and ors. Fold it into a single load or a load
5380 /// and a BSWAP if the targets supports it.
5381 ///
5382 /// Assuming little endian target:
5383 ///  i8 *a = ...
5384 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5385 /// =>
5386 ///  i32 val = *((i32)a)
5387 ///
5388 ///  i8 *a = ...
5389 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5390 /// =>
5391 ///  i32 val = BSWAP(*((i32)a))
5392 ///
5393 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5394 /// interact well with the worklist mechanism. When a part of the pattern is
5395 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5396 /// but the root node of the pattern which triggers the load combine is not
5397 /// necessarily a direct user of the changed node. For example, once the address
5398 /// of t28 load is reassociated load combine won't be triggered:
5399 ///             t25: i32 = add t4, Constant:i32<2>
5400 ///           t26: i64 = sign_extend t25
5401 ///        t27: i64 = add t2, t26
5402 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5403 ///     t29: i32 = zero_extend t28
5404 ///   t32: i32 = shl t29, Constant:i8<8>
5405 /// t33: i32 = or t23, t32
5406 /// As a possible fix visitLoad can check if the load can be a part of a load
5407 /// combine pattern and add corresponding OR roots to the worklist.
5408 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5409   assert(N->getOpcode() == ISD::OR &&
5410          "Can only match load combining against OR nodes");
5411 
5412   // Handles simple types only
5413   EVT VT = N->getValueType(0);
5414   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5415     return SDValue();
5416   unsigned ByteWidth = VT.getSizeInBits() / 8;
5417 
5418   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5419   // Before legalize we can introduce too wide illegal loads which will be later
5420   // split into legal sized loads. This enables us to combine i64 load by i8
5421   // patterns to a couple of i32 loads on 32 bit targets.
5422   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5423     return SDValue();
5424 
5425   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5426     unsigned BW, unsigned i) { return i; };
5427   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5428     unsigned BW, unsigned i) { return BW - i - 1; };
5429 
5430   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5431   auto MemoryByteOffset = [&] (ByteProvider P) {
5432     assert(P.isMemory() && "Must be a memory byte provider");
5433     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5434     assert(LoadBitWidth % 8 == 0 &&
5435            "can only analyze providers for individual bytes not bit");
5436     unsigned LoadByteWidth = LoadBitWidth / 8;
5437     return IsBigEndianTarget
5438             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5439             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5440   };
5441 
5442   Optional<BaseIndexOffset> Base;
5443   SDValue Chain;
5444 
5445   SmallPtrSet<LoadSDNode *, 8> Loads;
5446   Optional<ByteProvider> FirstByteProvider;
5447   int64_t FirstOffset = INT64_MAX;
5448 
5449   // Check if all the bytes of the OR we are looking at are loaded from the same
5450   // base address. Collect bytes offsets from Base address in ByteOffsets.
5451   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5452   for (unsigned i = 0; i < ByteWidth; i++) {
5453     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5454     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5455       return SDValue();
5456 
5457     LoadSDNode *L = P->Load;
5458     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5459            "Must be enforced by calculateByteProvider");
5460     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5461 
5462     // All loads must share the same chain
5463     SDValue LChain = L->getChain();
5464     if (!Chain)
5465       Chain = LChain;
5466     else if (Chain != LChain)
5467       return SDValue();
5468 
5469     // Loads must share the same base address
5470     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5471     int64_t ByteOffsetFromBase = 0;
5472     if (!Base)
5473       Base = Ptr;
5474     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5475       return SDValue();
5476 
5477     // Calculate the offset of the current byte from the base address
5478     ByteOffsetFromBase += MemoryByteOffset(*P);
5479     ByteOffsets[i] = ByteOffsetFromBase;
5480 
5481     // Remember the first byte load
5482     if (ByteOffsetFromBase < FirstOffset) {
5483       FirstByteProvider = P;
5484       FirstOffset = ByteOffsetFromBase;
5485     }
5486 
5487     Loads.insert(L);
5488   }
5489   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5490          "memory, so there must be at least one load which produces the value");
5491   assert(Base && "Base address of the accessed memory location must be set");
5492   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5493 
5494   // Check if the bytes of the OR we are looking at match with either big or
5495   // little endian value load
5496   bool BigEndian = true, LittleEndian = true;
5497   for (unsigned i = 0; i < ByteWidth; i++) {
5498     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5499     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5500     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5501     if (!BigEndian && !LittleEndian)
5502       return SDValue();
5503   }
5504   assert((BigEndian != LittleEndian) && "should be either or");
5505   assert(FirstByteProvider && "must be set");
5506 
5507   // Ensure that the first byte is loaded from zero offset of the first load.
5508   // So the combined value can be loaded from the first load address.
5509   if (MemoryByteOffset(*FirstByteProvider) != 0)
5510     return SDValue();
5511   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5512 
5513   // The node we are looking at matches with the pattern, check if we can
5514   // replace it with a single load and bswap if needed.
5515 
5516   // If the load needs byte swap check if the target supports it
5517   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5518 
5519   // Before legalize we can introduce illegal bswaps which will be later
5520   // converted to an explicit bswap sequence. This way we end up with a single
5521   // load and byte shuffling instead of several loads and byte shuffling.
5522   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5523     return SDValue();
5524 
5525   // Check that a load of the wide type is both allowed and fast on the target
5526   bool Fast = false;
5527   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5528                                         VT, FirstLoad->getAddressSpace(),
5529                                         FirstLoad->getAlignment(), &Fast);
5530   if (!Allowed || !Fast)
5531     return SDValue();
5532 
5533   SDValue NewLoad =
5534       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5535                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5536 
5537   // Transfer chain users from old loads to the new load.
5538   for (LoadSDNode *L : Loads)
5539     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5540 
5541   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5542 }
5543 
5544 // If the target has andn, bsl, or a similar bit-select instruction,
5545 // we want to unfold masked merge, with canonical pattern of:
5546 //   |        A  |  |B|
5547 //   ((x ^ y) & m) ^ y
5548 //    |  D  |
5549 // Into:
5550 //   (x & m) | (y & ~m)
5551 // If y is a constant, and the 'andn' does not work with immediates,
5552 // we unfold into a different pattern:
5553 //   ~(~x & m) & (m | y)
5554 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5555 //       the very least that breaks andnpd / andnps patterns, and because those
5556 //       patterns are simplified in IR and shouldn't be created in the DAG
5557 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5558   assert(N->getOpcode() == ISD::XOR);
5559 
5560   // Don't touch 'not' (i.e. where y = -1).
5561   if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
5562     return SDValue();
5563 
5564   EVT VT = N->getValueType(0);
5565 
5566   // There are 3 commutable operators in the pattern,
5567   // so we have to deal with 8 possible variants of the basic pattern.
5568   SDValue X, Y, M;
5569   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5570     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5571       return false;
5572     SDValue Xor = And.getOperand(XorIdx);
5573     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5574       return false;
5575     SDValue Xor0 = Xor.getOperand(0);
5576     SDValue Xor1 = Xor.getOperand(1);
5577     // Don't touch 'not' (i.e. where y = -1).
5578     if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
5579       return false;
5580     if (Other == Xor0)
5581       std::swap(Xor0, Xor1);
5582     if (Other != Xor1)
5583       return false;
5584     X = Xor0;
5585     Y = Xor1;
5586     M = And.getOperand(XorIdx ? 0 : 1);
5587     return true;
5588   };
5589 
5590   SDValue N0 = N->getOperand(0);
5591   SDValue N1 = N->getOperand(1);
5592   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5593       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5594     return SDValue();
5595 
5596   // Don't do anything if the mask is constant. This should not be reachable.
5597   // InstCombine should have already unfolded this pattern, and DAGCombiner
5598   // probably shouldn't produce it, too.
5599   if (isa<ConstantSDNode>(M.getNode()))
5600     return SDValue();
5601 
5602   // We can transform if the target has AndNot
5603   if (!TLI.hasAndNot(M))
5604     return SDValue();
5605 
5606   SDLoc DL(N);
5607 
5608   // If Y is a constant, check that 'andn' works with immediates.
5609   if (!TLI.hasAndNot(Y)) {
5610     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
5611     // If not, we need to do a bit more work to make sure andn is still used.
5612     SDValue NotX = DAG.getNOT(DL, X, VT);
5613     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
5614     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
5615     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
5616     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
5617   }
5618 
5619   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
5620   SDValue NotM = DAG.getNOT(DL, M, VT);
5621   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
5622 
5623   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
5624 }
5625 
5626 SDValue DAGCombiner::visitXOR(SDNode *N) {
5627   SDValue N0 = N->getOperand(0);
5628   SDValue N1 = N->getOperand(1);
5629   EVT VT = N0.getValueType();
5630 
5631   // fold vector ops
5632   if (VT.isVector()) {
5633     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5634       return FoldedVOp;
5635 
5636     // fold (xor x, 0) -> x, vector edition
5637     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5638       return N1;
5639     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5640       return N0;
5641   }
5642 
5643   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5644   if (N0.isUndef() && N1.isUndef())
5645     return DAG.getConstant(0, SDLoc(N), VT);
5646   // fold (xor x, undef) -> undef
5647   if (N0.isUndef())
5648     return N0;
5649   if (N1.isUndef())
5650     return N1;
5651   // fold (xor c1, c2) -> c1^c2
5652   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5653   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5654   if (N0C && N1C)
5655     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5656   // canonicalize constant to RHS
5657   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5658      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5659     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5660   // fold (xor x, 0) -> x
5661   if (isNullConstant(N1))
5662     return N0;
5663 
5664   if (SDValue NewSel = foldBinOpIntoSelect(N))
5665     return NewSel;
5666 
5667   // reassociate xor
5668   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
5669     return RXOR;
5670 
5671   // fold !(x cc y) -> (x !cc y)
5672   SDValue LHS, RHS, CC;
5673   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
5674     bool isInt = LHS.getValueType().isInteger();
5675     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
5676                                                isInt);
5677 
5678     if (!LegalOperations ||
5679         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
5680       switch (N0.getOpcode()) {
5681       default:
5682         llvm_unreachable("Unhandled SetCC Equivalent!");
5683       case ISD::SETCC:
5684         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
5685       case ISD::SELECT_CC:
5686         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
5687                                N0.getOperand(3), NotCC);
5688       }
5689     }
5690   }
5691 
5692   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
5693   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
5694       N0.getNode()->hasOneUse() &&
5695       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
5696     SDValue V = N0.getOperand(0);
5697     SDLoc DL(N0);
5698     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
5699                     DAG.getConstant(1, DL, V.getValueType()));
5700     AddToWorklist(V.getNode());
5701     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
5702   }
5703 
5704   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
5705   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
5706       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5707     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5708     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
5709       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5710       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5711       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5712       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5713       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5714     }
5715   }
5716   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
5717   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
5718       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
5719     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5720     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
5721       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
5722       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
5723       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
5724       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
5725       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
5726     }
5727   }
5728   // fold (xor (and x, y), y) -> (and (not x), y)
5729   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5730       N0->getOperand(1) == N1) {
5731     SDValue X = N0->getOperand(0);
5732     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
5733     AddToWorklist(NotX.getNode());
5734     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
5735   }
5736 
5737   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5738   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5739     SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
5740     SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
5741     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
5742       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
5743       SDValue S0 = S.getOperand(0);
5744       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
5745         unsigned OpSizeInBits = VT.getScalarSizeInBits();
5746         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
5747           if (C->getAPIntValue() == (OpSizeInBits - 1))
5748             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
5749       }
5750     }
5751   }
5752 
5753   // fold (xor x, x) -> 0
5754   if (N0 == N1)
5755     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
5756 
5757   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
5758   // Here is a concrete example of this equivalence:
5759   // i16   x ==  14
5760   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
5761   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
5762   //
5763   // =>
5764   //
5765   // i16     ~1      == 0b1111111111111110
5766   // i16 rol(~1, 14) == 0b1011111111111111
5767   //
5768   // Some additional tips to help conceptualize this transform:
5769   // - Try to see the operation as placing a single zero in a value of all ones.
5770   // - There exists no value for x which would allow the result to contain zero.
5771   // - Values of x larger than the bitwidth are undefined and do not require a
5772   //   consistent result.
5773   // - Pushing the zero left requires shifting one bits in from the right.
5774   // A rotate left of ~1 is a nice way of achieving the desired result.
5775   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
5776       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
5777     SDLoc DL(N);
5778     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
5779                        N0.getOperand(1));
5780   }
5781 
5782   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
5783   if (N0.getOpcode() == N1.getOpcode())
5784     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5785       return Tmp;
5786 
5787   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
5788   if (SDValue MM = unfoldMaskedMerge(N))
5789     return MM;
5790 
5791   // Simplify the expression using non-local knowledge.
5792   if (SimplifyDemandedBits(SDValue(N, 0)))
5793     return SDValue(N, 0);
5794 
5795   return SDValue();
5796 }
5797 
5798 /// Handle transforms common to the three shifts, when the shift amount is a
5799 /// constant.
5800 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
5801   SDNode *LHS = N->getOperand(0).getNode();
5802   if (!LHS->hasOneUse()) return SDValue();
5803 
5804   // We want to pull some binops through shifts, so that we have (and (shift))
5805   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
5806   // thing happens with address calculations, so it's important to canonicalize
5807   // it.
5808   bool HighBitSet = false;  // Can we transform this if the high bit is set?
5809 
5810   switch (LHS->getOpcode()) {
5811   default: return SDValue();
5812   case ISD::OR:
5813   case ISD::XOR:
5814     HighBitSet = false; // We can only transform sra if the high bit is clear.
5815     break;
5816   case ISD::AND:
5817     HighBitSet = true;  // We can only transform sra if the high bit is set.
5818     break;
5819   case ISD::ADD:
5820     if (N->getOpcode() != ISD::SHL)
5821       return SDValue(); // only shl(add) not sr[al](add).
5822     HighBitSet = false; // We can only transform sra if the high bit is clear.
5823     break;
5824   }
5825 
5826   // We require the RHS of the binop to be a constant and not opaque as well.
5827   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
5828   if (!BinOpCst) return SDValue();
5829 
5830   // FIXME: disable this unless the input to the binop is a shift by a constant
5831   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
5832   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
5833   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
5834                  BinOpLHSVal->getOpcode() == ISD::SRA ||
5835                  BinOpLHSVal->getOpcode() == ISD::SRL;
5836   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
5837                         BinOpLHSVal->getOpcode() == ISD::SELECT;
5838 
5839   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
5840       !isCopyOrSelect)
5841     return SDValue();
5842 
5843   if (isCopyOrSelect && N->hasOneUse())
5844     return SDValue();
5845 
5846   EVT VT = N->getValueType(0);
5847 
5848   // If this is a signed shift right, and the high bit is modified by the
5849   // logical operation, do not perform the transformation. The highBitSet
5850   // boolean indicates the value of the high bit of the constant which would
5851   // cause it to be modified for this operation.
5852   if (N->getOpcode() == ISD::SRA) {
5853     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
5854     if (BinOpRHSSignSet != HighBitSet)
5855       return SDValue();
5856   }
5857 
5858   if (!TLI.isDesirableToCommuteWithShift(LHS))
5859     return SDValue();
5860 
5861   // Fold the constants, shifting the binop RHS by the shift amount.
5862   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
5863                                N->getValueType(0),
5864                                LHS->getOperand(1), N->getOperand(1));
5865   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
5866 
5867   // Create the new shift.
5868   SDValue NewShift = DAG.getNode(N->getOpcode(),
5869                                  SDLoc(LHS->getOperand(0)),
5870                                  VT, LHS->getOperand(0), N->getOperand(1));
5871 
5872   // Create the new binop.
5873   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
5874 }
5875 
5876 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
5877   assert(N->getOpcode() == ISD::TRUNCATE);
5878   assert(N->getOperand(0).getOpcode() == ISD::AND);
5879 
5880   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
5881   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
5882     SDValue N01 = N->getOperand(0).getOperand(1);
5883     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
5884       SDLoc DL(N);
5885       EVT TruncVT = N->getValueType(0);
5886       SDValue N00 = N->getOperand(0).getOperand(0);
5887       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
5888       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
5889       AddToWorklist(Trunc00.getNode());
5890       AddToWorklist(Trunc01.getNode());
5891       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
5892     }
5893   }
5894 
5895   return SDValue();
5896 }
5897 
5898 SDValue DAGCombiner::visitRotate(SDNode *N) {
5899   SDLoc dl(N);
5900   SDValue N0 = N->getOperand(0);
5901   SDValue N1 = N->getOperand(1);
5902   EVT VT = N->getValueType(0);
5903   unsigned Bitsize = VT.getScalarSizeInBits();
5904 
5905   // fold (rot x, 0) -> x
5906   if (isNullConstantOrNullSplatConstant(N1))
5907     return N0;
5908 
5909   // fold (rot x, c) -> (rot x, c % BitSize)
5910   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
5911     if (Cst->getAPIntValue().uge(Bitsize)) {
5912       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
5913       return DAG.getNode(N->getOpcode(), dl, VT, N0,
5914                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
5915     }
5916   }
5917 
5918   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
5919   if (N1.getOpcode() == ISD::TRUNCATE &&
5920       N1.getOperand(0).getOpcode() == ISD::AND) {
5921     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5922       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
5923   }
5924 
5925   unsigned NextOp = N0.getOpcode();
5926   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5927   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
5928     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
5929     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
5930     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
5931       EVT ShiftVT = C1->getValueType(0);
5932       bool SameSide = (N->getOpcode() == NextOp);
5933       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5934       if (SDValue CombinedShift =
5935               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
5936         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
5937         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5938             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
5939             BitsizeC.getNode());
5940         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
5941                            CombinedShiftNorm);
5942       }
5943     }
5944   }
5945   return SDValue();
5946 }
5947 
5948 SDValue DAGCombiner::visitSHL(SDNode *N) {
5949   SDValue N0 = N->getOperand(0);
5950   SDValue N1 = N->getOperand(1);
5951   EVT VT = N0.getValueType();
5952   unsigned OpSizeInBits = VT.getScalarSizeInBits();
5953 
5954   // fold vector ops
5955   if (VT.isVector()) {
5956     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5957       return FoldedVOp;
5958 
5959     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
5960     // If setcc produces all-one true value then:
5961     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
5962     if (N1CV && N1CV->isConstant()) {
5963       if (N0.getOpcode() == ISD::AND) {
5964         SDValue N00 = N0->getOperand(0);
5965         SDValue N01 = N0->getOperand(1);
5966         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
5967 
5968         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
5969             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
5970                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
5971           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
5972                                                      N01CV, N1CV))
5973             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
5974         }
5975       }
5976     }
5977   }
5978 
5979   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5980 
5981   // fold (shl c1, c2) -> c1<<c2
5982   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5983   if (N0C && N1C && !N1C->isOpaque())
5984     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
5985   // fold (shl 0, x) -> 0
5986   if (isNullConstantOrNullSplatConstant(N0))
5987     return N0;
5988   // fold (shl x, c >= size(x)) -> undef
5989   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
5990   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
5991     return Val->getAPIntValue().uge(OpSizeInBits);
5992   };
5993   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
5994     return DAG.getUNDEF(VT);
5995   // fold (shl x, 0) -> x
5996   if (N1C && N1C->isNullValue())
5997     return N0;
5998   // fold (shl undef, x) -> 0
5999   if (N0.isUndef())
6000     return DAG.getConstant(0, SDLoc(N), VT);
6001 
6002   if (SDValue NewSel = foldBinOpIntoSelect(N))
6003     return NewSel;
6004 
6005   // if (shl x, c) is known to be zero, return 0
6006   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6007                             APInt::getAllOnesValue(OpSizeInBits)))
6008     return DAG.getConstant(0, SDLoc(N), VT);
6009   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6010   if (N1.getOpcode() == ISD::TRUNCATE &&
6011       N1.getOperand(0).getOpcode() == ISD::AND) {
6012     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6013       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6014   }
6015 
6016   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6017     return SDValue(N, 0);
6018 
6019   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6020   if (N0.getOpcode() == ISD::SHL) {
6021     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6022                                           ConstantSDNode *RHS) {
6023       APInt c1 = LHS->getAPIntValue();
6024       APInt c2 = RHS->getAPIntValue();
6025       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6026       return (c1 + c2).uge(OpSizeInBits);
6027     };
6028     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6029       return DAG.getConstant(0, SDLoc(N), VT);
6030 
6031     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6032                                        ConstantSDNode *RHS) {
6033       APInt c1 = LHS->getAPIntValue();
6034       APInt c2 = RHS->getAPIntValue();
6035       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6036       return (c1 + c2).ult(OpSizeInBits);
6037     };
6038     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6039       SDLoc DL(N);
6040       EVT ShiftVT = N1.getValueType();
6041       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6042       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6043     }
6044   }
6045 
6046   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6047   // For this to be valid, the second form must not preserve any of the bits
6048   // that are shifted out by the inner shift in the first form.  This means
6049   // the outer shift size must be >= the number of bits added by the ext.
6050   // As a corollary, we don't care what kind of ext it is.
6051   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6052               N0.getOpcode() == ISD::ANY_EXTEND ||
6053               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6054       N0.getOperand(0).getOpcode() == ISD::SHL) {
6055     SDValue N0Op0 = N0.getOperand(0);
6056     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6057       APInt c1 = N0Op0C1->getAPIntValue();
6058       APInt c2 = N1C->getAPIntValue();
6059       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6060 
6061       EVT InnerShiftVT = N0Op0.getValueType();
6062       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6063       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6064         SDLoc DL(N0);
6065         APInt Sum = c1 + c2;
6066         if (Sum.uge(OpSizeInBits))
6067           return DAG.getConstant(0, DL, VT);
6068 
6069         return DAG.getNode(
6070             ISD::SHL, DL, VT,
6071             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6072             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6073       }
6074     }
6075   }
6076 
6077   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6078   // Only fold this if the inner zext has no other uses to avoid increasing
6079   // the total number of instructions.
6080   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6081       N0.getOperand(0).getOpcode() == ISD::SRL) {
6082     SDValue N0Op0 = N0.getOperand(0);
6083     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6084       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6085         uint64_t c1 = N0Op0C1->getZExtValue();
6086         uint64_t c2 = N1C->getZExtValue();
6087         if (c1 == c2) {
6088           SDValue NewOp0 = N0.getOperand(0);
6089           EVT CountVT = NewOp0.getOperand(1).getValueType();
6090           SDLoc DL(N);
6091           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6092                                        NewOp0,
6093                                        DAG.getConstant(c2, DL, CountVT));
6094           AddToWorklist(NewSHL.getNode());
6095           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6096         }
6097       }
6098     }
6099   }
6100 
6101   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6102   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6103   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6104       N0->getFlags().hasExact()) {
6105     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6106       uint64_t C1 = N0C1->getZExtValue();
6107       uint64_t C2 = N1C->getZExtValue();
6108       SDLoc DL(N);
6109       if (C1 <= C2)
6110         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6111                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6112       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6113                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6114     }
6115   }
6116 
6117   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6118   //                               (and (srl x, (sub c1, c2), MASK)
6119   // Only fold this if the inner shift has no other uses -- if it does, folding
6120   // this will increase the total number of instructions.
6121   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6122     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6123       uint64_t c1 = N0C1->getZExtValue();
6124       if (c1 < OpSizeInBits) {
6125         uint64_t c2 = N1C->getZExtValue();
6126         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6127         SDValue Shift;
6128         if (c2 > c1) {
6129           Mask <<= c2 - c1;
6130           SDLoc DL(N);
6131           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6132                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6133         } else {
6134           Mask.lshrInPlace(c1 - c2);
6135           SDLoc DL(N);
6136           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6137                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6138         }
6139         SDLoc DL(N0);
6140         return DAG.getNode(ISD::AND, DL, VT, Shift,
6141                            DAG.getConstant(Mask, DL, VT));
6142       }
6143     }
6144   }
6145 
6146   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6147   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6148       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6149     SDLoc DL(N);
6150     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6151     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6152     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6153   }
6154 
6155   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6156   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6157   // Variant of version done on multiply, except mul by a power of 2 is turned
6158   // into a shift.
6159   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6160       N0.getNode()->hasOneUse() &&
6161       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6162       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6163     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6164     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6165     AddToWorklist(Shl0.getNode());
6166     AddToWorklist(Shl1.getNode());
6167     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6168   }
6169 
6170   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6171   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6172       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6173       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6174     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6175     if (isConstantOrConstantVector(Shl))
6176       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6177   }
6178 
6179   if (N1C && !N1C->isOpaque())
6180     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6181       return NewSHL;
6182 
6183   return SDValue();
6184 }
6185 
6186 SDValue DAGCombiner::visitSRA(SDNode *N) {
6187   SDValue N0 = N->getOperand(0);
6188   SDValue N1 = N->getOperand(1);
6189   EVT VT = N0.getValueType();
6190   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6191 
6192   // Arithmetic shifting an all-sign-bit value is a no-op.
6193   // fold (sra 0, x) -> 0
6194   // fold (sra -1, x) -> -1
6195   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6196     return N0;
6197 
6198   // fold vector ops
6199   if (VT.isVector())
6200     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6201       return FoldedVOp;
6202 
6203   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6204 
6205   // fold (sra c1, c2) -> (sra c1, c2)
6206   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6207   if (N0C && N1C && !N1C->isOpaque())
6208     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6209   // fold (sra x, c >= size(x)) -> undef
6210   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6211   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6212     return Val->getAPIntValue().uge(OpSizeInBits);
6213   };
6214   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6215     return DAG.getUNDEF(VT);
6216   // fold (sra x, 0) -> x
6217   if (N1C && N1C->isNullValue())
6218     return N0;
6219 
6220   if (SDValue NewSel = foldBinOpIntoSelect(N))
6221     return NewSel;
6222 
6223   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6224   // sext_inreg.
6225   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6226     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6227     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6228     if (VT.isVector())
6229       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6230                                ExtVT, VT.getVectorNumElements());
6231     if ((!LegalOperations ||
6232          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6233       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6234                          N0.getOperand(0), DAG.getValueType(ExtVT));
6235   }
6236 
6237   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6238   if (N0.getOpcode() == ISD::SRA) {
6239     SDLoc DL(N);
6240     EVT ShiftVT = N1.getValueType();
6241 
6242     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6243                                           ConstantSDNode *RHS) {
6244       APInt c1 = LHS->getAPIntValue();
6245       APInt c2 = RHS->getAPIntValue();
6246       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6247       return (c1 + c2).uge(OpSizeInBits);
6248     };
6249     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6250       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
6251                          DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
6252 
6253     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6254                                        ConstantSDNode *RHS) {
6255       APInt c1 = LHS->getAPIntValue();
6256       APInt c2 = RHS->getAPIntValue();
6257       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6258       return (c1 + c2).ult(OpSizeInBits);
6259     };
6260     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6261       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6262       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
6263     }
6264   }
6265 
6266   // fold (sra (shl X, m), (sub result_size, n))
6267   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6268   // result_size - n != m.
6269   // If truncate is free for the target sext(shl) is likely to result in better
6270   // code.
6271   if (N0.getOpcode() == ISD::SHL && N1C) {
6272     // Get the two constanst of the shifts, CN0 = m, CN = n.
6273     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6274     if (N01C) {
6275       LLVMContext &Ctx = *DAG.getContext();
6276       // Determine what the truncate's result bitsize and type would be.
6277       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6278 
6279       if (VT.isVector())
6280         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6281 
6282       // Determine the residual right-shift amount.
6283       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6284 
6285       // If the shift is not a no-op (in which case this should be just a sign
6286       // extend already), the truncated to type is legal, sign_extend is legal
6287       // on that type, and the truncate to that type is both legal and free,
6288       // perform the transform.
6289       if ((ShiftAmt > 0) &&
6290           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6291           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6292           TLI.isTruncateFree(VT, TruncVT)) {
6293         SDLoc DL(N);
6294         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6295             getShiftAmountTy(N0.getOperand(0).getValueType()));
6296         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6297                                     N0.getOperand(0), Amt);
6298         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6299                                     Shift);
6300         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6301                            N->getValueType(0), Trunc);
6302       }
6303     }
6304   }
6305 
6306   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6307   if (N1.getOpcode() == ISD::TRUNCATE &&
6308       N1.getOperand(0).getOpcode() == ISD::AND) {
6309     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6310       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6311   }
6312 
6313   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6314   //      if c1 is equal to the number of bits the trunc removes
6315   if (N0.getOpcode() == ISD::TRUNCATE &&
6316       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6317        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6318       N0.getOperand(0).hasOneUse() &&
6319       N0.getOperand(0).getOperand(1).hasOneUse() &&
6320       N1C) {
6321     SDValue N0Op0 = N0.getOperand(0);
6322     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6323       unsigned LargeShiftVal = LargeShift->getZExtValue();
6324       EVT LargeVT = N0Op0.getValueType();
6325 
6326       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6327         SDLoc DL(N);
6328         SDValue Amt =
6329           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6330                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6331         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6332                                   N0Op0.getOperand(0), Amt);
6333         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6334       }
6335     }
6336   }
6337 
6338   // Simplify, based on bits shifted out of the LHS.
6339   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6340     return SDValue(N, 0);
6341 
6342   // If the sign bit is known to be zero, switch this to a SRL.
6343   if (DAG.SignBitIsZero(N0))
6344     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6345 
6346   if (N1C && !N1C->isOpaque())
6347     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6348       return NewSRA;
6349 
6350   return SDValue();
6351 }
6352 
6353 SDValue DAGCombiner::visitSRL(SDNode *N) {
6354   SDValue N0 = N->getOperand(0);
6355   SDValue N1 = N->getOperand(1);
6356   EVT VT = N0.getValueType();
6357   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6358 
6359   // fold vector ops
6360   if (VT.isVector())
6361     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6362       return FoldedVOp;
6363 
6364   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6365 
6366   // fold (srl c1, c2) -> c1 >>u c2
6367   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6368   if (N0C && N1C && !N1C->isOpaque())
6369     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6370   // fold (srl 0, x) -> 0
6371   if (isNullConstantOrNullSplatConstant(N0))
6372     return N0;
6373   // fold (srl x, c >= size(x)) -> undef
6374   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6375   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6376     return Val->getAPIntValue().uge(OpSizeInBits);
6377   };
6378   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6379     return DAG.getUNDEF(VT);
6380   // fold (srl x, 0) -> x
6381   if (N1C && N1C->isNullValue())
6382     return N0;
6383 
6384   if (SDValue NewSel = foldBinOpIntoSelect(N))
6385     return NewSel;
6386 
6387   // if (srl x, c) is known to be zero, return 0
6388   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6389                                    APInt::getAllOnesValue(OpSizeInBits)))
6390     return DAG.getConstant(0, SDLoc(N), VT);
6391 
6392   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6393   if (N0.getOpcode() == ISD::SRL) {
6394     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6395                                           ConstantSDNode *RHS) {
6396       APInt c1 = LHS->getAPIntValue();
6397       APInt c2 = RHS->getAPIntValue();
6398       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6399       return (c1 + c2).uge(OpSizeInBits);
6400     };
6401     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6402       return DAG.getConstant(0, SDLoc(N), VT);
6403 
6404     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6405                                        ConstantSDNode *RHS) {
6406       APInt c1 = LHS->getAPIntValue();
6407       APInt c2 = RHS->getAPIntValue();
6408       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6409       return (c1 + c2).ult(OpSizeInBits);
6410     };
6411     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6412       SDLoc DL(N);
6413       EVT ShiftVT = N1.getValueType();
6414       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6415       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6416     }
6417   }
6418 
6419   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6420   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6421       N0.getOperand(0).getOpcode() == ISD::SRL) {
6422     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6423       uint64_t c1 = N001C->getZExtValue();
6424       uint64_t c2 = N1C->getZExtValue();
6425       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6426       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6427       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6428       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6429       if (c1 + OpSizeInBits == InnerShiftSize) {
6430         SDLoc DL(N0);
6431         if (c1 + c2 >= InnerShiftSize)
6432           return DAG.getConstant(0, DL, VT);
6433         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6434                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6435                                        N0.getOperand(0).getOperand(0),
6436                                        DAG.getConstant(c1 + c2, DL,
6437                                                        ShiftCountVT)));
6438       }
6439     }
6440   }
6441 
6442   // fold (srl (shl x, c), c) -> (and x, cst2)
6443   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6444       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6445     SDLoc DL(N);
6446     SDValue Mask =
6447         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6448     AddToWorklist(Mask.getNode());
6449     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6450   }
6451 
6452   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6453   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6454     // Shifting in all undef bits?
6455     EVT SmallVT = N0.getOperand(0).getValueType();
6456     unsigned BitSize = SmallVT.getScalarSizeInBits();
6457     if (N1C->getZExtValue() >= BitSize)
6458       return DAG.getUNDEF(VT);
6459 
6460     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6461       uint64_t ShiftAmt = N1C->getZExtValue();
6462       SDLoc DL0(N0);
6463       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6464                                        N0.getOperand(0),
6465                           DAG.getConstant(ShiftAmt, DL0,
6466                                           getShiftAmountTy(SmallVT)));
6467       AddToWorklist(SmallShift.getNode());
6468       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6469       SDLoc DL(N);
6470       return DAG.getNode(ISD::AND, DL, VT,
6471                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6472                          DAG.getConstant(Mask, DL, VT));
6473     }
6474   }
6475 
6476   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6477   // bit, which is unmodified by sra.
6478   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6479     if (N0.getOpcode() == ISD::SRA)
6480       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6481   }
6482 
6483   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6484   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6485       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6486     KnownBits Known;
6487     DAG.computeKnownBits(N0.getOperand(0), Known);
6488 
6489     // If any of the input bits are KnownOne, then the input couldn't be all
6490     // zeros, thus the result of the srl will always be zero.
6491     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6492 
6493     // If all of the bits input the to ctlz node are known to be zero, then
6494     // the result of the ctlz is "32" and the result of the shift is one.
6495     APInt UnknownBits = ~Known.Zero;
6496     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6497 
6498     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6499     if (UnknownBits.isPowerOf2()) {
6500       // Okay, we know that only that the single bit specified by UnknownBits
6501       // could be set on input to the CTLZ node. If this bit is set, the SRL
6502       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6503       // to an SRL/XOR pair, which is likely to simplify more.
6504       unsigned ShAmt = UnknownBits.countTrailingZeros();
6505       SDValue Op = N0.getOperand(0);
6506 
6507       if (ShAmt) {
6508         SDLoc DL(N0);
6509         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6510                   DAG.getConstant(ShAmt, DL,
6511                                   getShiftAmountTy(Op.getValueType())));
6512         AddToWorklist(Op.getNode());
6513       }
6514 
6515       SDLoc DL(N);
6516       return DAG.getNode(ISD::XOR, DL, VT,
6517                          Op, DAG.getConstant(1, DL, VT));
6518     }
6519   }
6520 
6521   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6522   if (N1.getOpcode() == ISD::TRUNCATE &&
6523       N1.getOperand(0).getOpcode() == ISD::AND) {
6524     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6525       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6526   }
6527 
6528   // fold operands of srl based on knowledge that the low bits are not
6529   // demanded.
6530   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6531     return SDValue(N, 0);
6532 
6533   if (N1C && !N1C->isOpaque())
6534     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6535       return NewSRL;
6536 
6537   // Attempt to convert a srl of a load into a narrower zero-extending load.
6538   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6539     return NarrowLoad;
6540 
6541   // Here is a common situation. We want to optimize:
6542   //
6543   //   %a = ...
6544   //   %b = and i32 %a, 2
6545   //   %c = srl i32 %b, 1
6546   //   brcond i32 %c ...
6547   //
6548   // into
6549   //
6550   //   %a = ...
6551   //   %b = and %a, 2
6552   //   %c = setcc eq %b, 0
6553   //   brcond %c ...
6554   //
6555   // However when after the source operand of SRL is optimized into AND, the SRL
6556   // itself may not be optimized further. Look for it and add the BRCOND into
6557   // the worklist.
6558   if (N->hasOneUse()) {
6559     SDNode *Use = *N->use_begin();
6560     if (Use->getOpcode() == ISD::BRCOND)
6561       AddToWorklist(Use);
6562     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6563       // Also look pass the truncate.
6564       Use = *Use->use_begin();
6565       if (Use->getOpcode() == ISD::BRCOND)
6566         AddToWorklist(Use);
6567     }
6568   }
6569 
6570   return SDValue();
6571 }
6572 
6573 SDValue DAGCombiner::visitABS(SDNode *N) {
6574   SDValue N0 = N->getOperand(0);
6575   EVT VT = N->getValueType(0);
6576 
6577   // fold (abs c1) -> c2
6578   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6579     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6580   // fold (abs (abs x)) -> (abs x)
6581   if (N0.getOpcode() == ISD::ABS)
6582     return N0;
6583   // fold (abs x) -> x iff not-negative
6584   if (DAG.SignBitIsZero(N0))
6585     return N0;
6586   return SDValue();
6587 }
6588 
6589 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6590   SDValue N0 = N->getOperand(0);
6591   EVT VT = N->getValueType(0);
6592 
6593   // fold (bswap c1) -> c2
6594   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6595     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6596   // fold (bswap (bswap x)) -> x
6597   if (N0.getOpcode() == ISD::BSWAP)
6598     return N0->getOperand(0);
6599   return SDValue();
6600 }
6601 
6602 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6603   SDValue N0 = N->getOperand(0);
6604   EVT VT = N->getValueType(0);
6605 
6606   // fold (bitreverse c1) -> c2
6607   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6608     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6609   // fold (bitreverse (bitreverse x)) -> x
6610   if (N0.getOpcode() == ISD::BITREVERSE)
6611     return N0.getOperand(0);
6612   return SDValue();
6613 }
6614 
6615 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6616   SDValue N0 = N->getOperand(0);
6617   EVT VT = N->getValueType(0);
6618 
6619   // fold (ctlz c1) -> c2
6620   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6621     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6622 
6623   // If the value is known never to be zero, switch to the undef version.
6624   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6625     if (DAG.isKnownNeverZero(N0))
6626       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6627   }
6628 
6629   return SDValue();
6630 }
6631 
6632 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6633   SDValue N0 = N->getOperand(0);
6634   EVT VT = N->getValueType(0);
6635 
6636   // fold (ctlz_zero_undef c1) -> c2
6637   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6638     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6639   return SDValue();
6640 }
6641 
6642 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6643   SDValue N0 = N->getOperand(0);
6644   EVT VT = N->getValueType(0);
6645 
6646   // fold (cttz c1) -> c2
6647   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6648     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6649 
6650   // If the value is known never to be zero, switch to the undef version.
6651   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
6652     if (DAG.isKnownNeverZero(N0))
6653       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6654   }
6655 
6656   return SDValue();
6657 }
6658 
6659 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6660   SDValue N0 = N->getOperand(0);
6661   EVT VT = N->getValueType(0);
6662 
6663   // fold (cttz_zero_undef c1) -> c2
6664   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6665     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6666   return SDValue();
6667 }
6668 
6669 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
6670   SDValue N0 = N->getOperand(0);
6671   EVT VT = N->getValueType(0);
6672 
6673   // fold (ctpop c1) -> c2
6674   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6675     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
6676   return SDValue();
6677 }
6678 
6679 /// Generate Min/Max node
6680 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
6681                                    SDValue RHS, SDValue True, SDValue False,
6682                                    ISD::CondCode CC, const TargetLowering &TLI,
6683                                    SelectionDAG &DAG) {
6684   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
6685     return SDValue();
6686 
6687   switch (CC) {
6688   case ISD::SETOLT:
6689   case ISD::SETOLE:
6690   case ISD::SETLT:
6691   case ISD::SETLE:
6692   case ISD::SETULT:
6693   case ISD::SETULE: {
6694     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
6695     if (TLI.isOperationLegal(Opcode, VT))
6696       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6697     return SDValue();
6698   }
6699   case ISD::SETOGT:
6700   case ISD::SETOGE:
6701   case ISD::SETGT:
6702   case ISD::SETGE:
6703   case ISD::SETUGT:
6704   case ISD::SETUGE: {
6705     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
6706     if (TLI.isOperationLegal(Opcode, VT))
6707       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
6708     return SDValue();
6709   }
6710   default:
6711     return SDValue();
6712   }
6713 }
6714 
6715 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
6716   SDValue Cond = N->getOperand(0);
6717   SDValue N1 = N->getOperand(1);
6718   SDValue N2 = N->getOperand(2);
6719   EVT VT = N->getValueType(0);
6720   EVT CondVT = Cond.getValueType();
6721   SDLoc DL(N);
6722 
6723   if (!VT.isInteger())
6724     return SDValue();
6725 
6726   auto *C1 = dyn_cast<ConstantSDNode>(N1);
6727   auto *C2 = dyn_cast<ConstantSDNode>(N2);
6728   if (!C1 || !C2)
6729     return SDValue();
6730 
6731   // Only do this before legalization to avoid conflicting with target-specific
6732   // transforms in the other direction (create a select from a zext/sext). There
6733   // is also a target-independent combine here in DAGCombiner in the other
6734   // direction for (select Cond, -1, 0) when the condition is not i1.
6735   if (CondVT == MVT::i1 && !LegalOperations) {
6736     if (C1->isNullValue() && C2->isOne()) {
6737       // select Cond, 0, 1 --> zext (!Cond)
6738       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6739       if (VT != MVT::i1)
6740         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
6741       return NotCond;
6742     }
6743     if (C1->isNullValue() && C2->isAllOnesValue()) {
6744       // select Cond, 0, -1 --> sext (!Cond)
6745       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
6746       if (VT != MVT::i1)
6747         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
6748       return NotCond;
6749     }
6750     if (C1->isOne() && C2->isNullValue()) {
6751       // select Cond, 1, 0 --> zext (Cond)
6752       if (VT != MVT::i1)
6753         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6754       return Cond;
6755     }
6756     if (C1->isAllOnesValue() && C2->isNullValue()) {
6757       // select Cond, -1, 0 --> sext (Cond)
6758       if (VT != MVT::i1)
6759         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6760       return Cond;
6761     }
6762 
6763     // For any constants that differ by 1, we can transform the select into an
6764     // extend and add. Use a target hook because some targets may prefer to
6765     // transform in the other direction.
6766     if (TLI.convertSelectOfConstantsToMath(VT)) {
6767       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
6768         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6769         if (VT != MVT::i1)
6770           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
6771         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6772       }
6773       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
6774         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6775         if (VT != MVT::i1)
6776           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
6777         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
6778       }
6779     }
6780 
6781     return SDValue();
6782   }
6783 
6784   // fold (select Cond, 0, 1) -> (xor Cond, 1)
6785   // We can't do this reliably if integer based booleans have different contents
6786   // to floating point based booleans. This is because we can't tell whether we
6787   // have an integer-based boolean or a floating-point-based boolean unless we
6788   // can find the SETCC that produced it and inspect its operands. This is
6789   // fairly easy if C is the SETCC node, but it can potentially be
6790   // undiscoverable (or not reasonably discoverable). For example, it could be
6791   // in another basic block or it could require searching a complicated
6792   // expression.
6793   if (CondVT.isInteger() &&
6794       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
6795           TargetLowering::ZeroOrOneBooleanContent &&
6796       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
6797           TargetLowering::ZeroOrOneBooleanContent &&
6798       C1->isNullValue() && C2->isOne()) {
6799     SDValue NotCond =
6800         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
6801     if (VT.bitsEq(CondVT))
6802       return NotCond;
6803     return DAG.getZExtOrTrunc(NotCond, DL, VT);
6804   }
6805 
6806   return SDValue();
6807 }
6808 
6809 SDValue DAGCombiner::visitSELECT(SDNode *N) {
6810   SDValue N0 = N->getOperand(0);
6811   SDValue N1 = N->getOperand(1);
6812   SDValue N2 = N->getOperand(2);
6813   EVT VT = N->getValueType(0);
6814   EVT VT0 = N0.getValueType();
6815   SDLoc DL(N);
6816 
6817   // fold (select C, X, X) -> X
6818   if (N1 == N2)
6819     return N1;
6820 
6821   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
6822     // fold (select true, X, Y) -> X
6823     // fold (select false, X, Y) -> Y
6824     return !N0C->isNullValue() ? N1 : N2;
6825   }
6826 
6827   // fold (select X, X, Y) -> (or X, Y)
6828   // fold (select X, 1, Y) -> (or C, Y)
6829   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
6830     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
6831 
6832   if (SDValue V = foldSelectOfConstants(N))
6833     return V;
6834 
6835   // fold (select C, 0, X) -> (and (not C), X)
6836   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
6837     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6838     AddToWorklist(NOTNode.getNode());
6839     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
6840   }
6841   // fold (select C, X, 1) -> (or (not C), X)
6842   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
6843     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
6844     AddToWorklist(NOTNode.getNode());
6845     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
6846   }
6847   // fold (select X, Y, X) -> (and X, Y)
6848   // fold (select X, Y, 0) -> (and X, Y)
6849   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
6850     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
6851 
6852   // If we can fold this based on the true/false value, do so.
6853   if (SimplifySelectOps(N, N1, N2))
6854     return SDValue(N, 0); // Don't revisit N.
6855 
6856   if (VT0 == MVT::i1) {
6857     // The code in this block deals with the following 2 equivalences:
6858     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
6859     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
6860     // The target can specify its preferred form with the
6861     // shouldNormalizeToSelectSequence() callback. However we always transform
6862     // to the right anyway if we find the inner select exists in the DAG anyway
6863     // and we always transform to the left side if we know that we can further
6864     // optimize the combination of the conditions.
6865     bool normalizeToSequence =
6866         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
6867     // select (and Cond0, Cond1), X, Y
6868     //   -> select Cond0, (select Cond1, X, Y), Y
6869     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
6870       SDValue Cond0 = N0->getOperand(0);
6871       SDValue Cond1 = N0->getOperand(1);
6872       SDValue InnerSelect =
6873           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6874       if (normalizeToSequence || !InnerSelect.use_empty())
6875         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
6876                            InnerSelect, N2);
6877     }
6878     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
6879     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
6880       SDValue Cond0 = N0->getOperand(0);
6881       SDValue Cond1 = N0->getOperand(1);
6882       SDValue InnerSelect =
6883           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
6884       if (normalizeToSequence || !InnerSelect.use_empty())
6885         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
6886                            InnerSelect);
6887     }
6888 
6889     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
6890     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
6891       SDValue N1_0 = N1->getOperand(0);
6892       SDValue N1_1 = N1->getOperand(1);
6893       SDValue N1_2 = N1->getOperand(2);
6894       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
6895         // Create the actual and node if we can generate good code for it.
6896         if (!normalizeToSequence) {
6897           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
6898           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
6899         }
6900         // Otherwise see if we can optimize the "and" to a better pattern.
6901         if (SDValue Combined = visitANDLike(N0, N1_0, N))
6902           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
6903                              N2);
6904       }
6905     }
6906     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
6907     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
6908       SDValue N2_0 = N2->getOperand(0);
6909       SDValue N2_1 = N2->getOperand(1);
6910       SDValue N2_2 = N2->getOperand(2);
6911       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
6912         // Create the actual or node if we can generate good code for it.
6913         if (!normalizeToSequence) {
6914           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
6915           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
6916         }
6917         // Otherwise see if we can optimize to a better pattern.
6918         if (SDValue Combined = visitORLike(N0, N2_0, N))
6919           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
6920                              N2_2);
6921       }
6922     }
6923   }
6924 
6925   if (VT0 == MVT::i1) {
6926     // select (not Cond), N1, N2 -> select Cond, N2, N1
6927     if (isBitwiseNot(N0))
6928       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
6929   }
6930 
6931   // fold selects based on a setcc into other things, such as min/max/abs
6932   if (N0.getOpcode() == ISD::SETCC) {
6933     // select x, y (fcmp lt x, y) -> fminnum x, y
6934     // select x, y (fcmp gt x, y) -> fmaxnum x, y
6935     //
6936     // This is OK if we don't care about what happens if either operand is a
6937     // NaN.
6938     //
6939 
6940     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
6941     // no signed zeros as well as no nans.
6942     const TargetOptions &Options = DAG.getTarget().Options;
6943     if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
6944         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
6945       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6946 
6947       if (SDValue FMinMax = combineMinNumMaxNum(
6948               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
6949         return FMinMax;
6950     }
6951 
6952     if ((!LegalOperations &&
6953          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
6954         TLI.isOperationLegal(ISD::SELECT_CC, VT))
6955       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
6956                          N0.getOperand(1), N1, N2, N0.getOperand(2));
6957     return SimplifySelect(DL, N0, N1, N2);
6958   }
6959 
6960   return SDValue();
6961 }
6962 
6963 static
6964 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
6965   SDLoc DL(N);
6966   EVT LoVT, HiVT;
6967   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
6968 
6969   // Split the inputs.
6970   SDValue Lo, Hi, LL, LH, RL, RH;
6971   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
6972   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
6973 
6974   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
6975   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
6976 
6977   return std::make_pair(Lo, Hi);
6978 }
6979 
6980 // This function assumes all the vselect's arguments are CONCAT_VECTOR
6981 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
6982 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
6983   SDLoc DL(N);
6984   SDValue Cond = N->getOperand(0);
6985   SDValue LHS = N->getOperand(1);
6986   SDValue RHS = N->getOperand(2);
6987   EVT VT = N->getValueType(0);
6988   int NumElems = VT.getVectorNumElements();
6989   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
6990          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
6991          Cond.getOpcode() == ISD::BUILD_VECTOR);
6992 
6993   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
6994   // binary ones here.
6995   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
6996     return SDValue();
6997 
6998   // We're sure we have an even number of elements due to the
6999   // concat_vectors we have as arguments to vselect.
7000   // Skip BV elements until we find one that's not an UNDEF
7001   // After we find an UNDEF element, keep looping until we get to half the
7002   // length of the BV and see if all the non-undef nodes are the same.
7003   ConstantSDNode *BottomHalf = nullptr;
7004   for (int i = 0; i < NumElems / 2; ++i) {
7005     if (Cond->getOperand(i)->isUndef())
7006       continue;
7007 
7008     if (BottomHalf == nullptr)
7009       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7010     else if (Cond->getOperand(i).getNode() != BottomHalf)
7011       return SDValue();
7012   }
7013 
7014   // Do the same for the second half of the BuildVector
7015   ConstantSDNode *TopHalf = nullptr;
7016   for (int i = NumElems / 2; i < NumElems; ++i) {
7017     if (Cond->getOperand(i)->isUndef())
7018       continue;
7019 
7020     if (TopHalf == nullptr)
7021       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7022     else if (Cond->getOperand(i).getNode() != TopHalf)
7023       return SDValue();
7024   }
7025 
7026   assert(TopHalf && BottomHalf &&
7027          "One half of the selector was all UNDEFs and the other was all the "
7028          "same value. This should have been addressed before this function.");
7029   return DAG.getNode(
7030       ISD::CONCAT_VECTORS, DL, VT,
7031       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7032       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7033 }
7034 
7035 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7036   if (Level >= AfterLegalizeTypes)
7037     return SDValue();
7038 
7039   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7040   SDValue Mask = MSC->getMask();
7041   SDValue Data  = MSC->getValue();
7042   SDLoc DL(N);
7043 
7044   // If the MSCATTER data type requires splitting and the mask is provided by a
7045   // SETCC, then split both nodes and its operands before legalization. This
7046   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7047   // and enables future optimizations (e.g. min/max pattern matching on X86).
7048   if (Mask.getOpcode() != ISD::SETCC)
7049     return SDValue();
7050 
7051   // Check if any splitting is required.
7052   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7053       TargetLowering::TypeSplitVector)
7054     return SDValue();
7055   SDValue MaskLo, MaskHi, Lo, Hi;
7056   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7057 
7058   EVT LoVT, HiVT;
7059   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7060 
7061   SDValue Chain = MSC->getChain();
7062 
7063   EVT MemoryVT = MSC->getMemoryVT();
7064   unsigned Alignment = MSC->getOriginalAlignment();
7065 
7066   EVT LoMemVT, HiMemVT;
7067   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7068 
7069   SDValue DataLo, DataHi;
7070   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7071 
7072   SDValue Scale = MSC->getScale();
7073   SDValue BasePtr = MSC->getBasePtr();
7074   SDValue IndexLo, IndexHi;
7075   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7076 
7077   MachineMemOperand *MMO = DAG.getMachineFunction().
7078     getMachineMemOperand(MSC->getPointerInfo(),
7079                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7080                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7081 
7082   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7083   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
7084                             DL, OpsLo, MMO);
7085 
7086   SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7087   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7088                             DL, OpsHi, MMO);
7089 
7090   AddToWorklist(Lo.getNode());
7091   AddToWorklist(Hi.getNode());
7092 
7093   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7094 }
7095 
7096 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7097   if (Level >= AfterLegalizeTypes)
7098     return SDValue();
7099 
7100   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7101   SDValue Mask = MST->getMask();
7102   SDValue Data  = MST->getValue();
7103   EVT VT = Data.getValueType();
7104   SDLoc DL(N);
7105 
7106   // If the MSTORE data type requires splitting and the mask is provided by a
7107   // SETCC, then split both nodes and its operands before legalization. This
7108   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7109   // and enables future optimizations (e.g. min/max pattern matching on X86).
7110   if (Mask.getOpcode() == ISD::SETCC) {
7111     // Check if any splitting is required.
7112     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7113         TargetLowering::TypeSplitVector)
7114       return SDValue();
7115 
7116     SDValue MaskLo, MaskHi, Lo, Hi;
7117     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7118 
7119     SDValue Chain = MST->getChain();
7120     SDValue Ptr   = MST->getBasePtr();
7121 
7122     EVT MemoryVT = MST->getMemoryVT();
7123     unsigned Alignment = MST->getOriginalAlignment();
7124 
7125     // if Alignment is equal to the vector size,
7126     // take the half of it for the second part
7127     unsigned SecondHalfAlignment =
7128       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7129 
7130     EVT LoMemVT, HiMemVT;
7131     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7132 
7133     SDValue DataLo, DataHi;
7134     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7135 
7136     MachineMemOperand *MMO = DAG.getMachineFunction().
7137       getMachineMemOperand(MST->getPointerInfo(),
7138                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7139                            Alignment, MST->getAAInfo(), MST->getRanges());
7140 
7141     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7142                             MST->isTruncatingStore(),
7143                             MST->isCompressingStore());
7144 
7145     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7146                                      MST->isCompressingStore());
7147     unsigned HiOffset = LoMemVT.getStoreSize();
7148 
7149     MMO = DAG.getMachineFunction().getMachineMemOperand(
7150         MST->getPointerInfo().getWithOffset(HiOffset),
7151         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7152         MST->getAAInfo(), MST->getRanges());
7153 
7154     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7155                             MST->isTruncatingStore(),
7156                             MST->isCompressingStore());
7157 
7158     AddToWorklist(Lo.getNode());
7159     AddToWorklist(Hi.getNode());
7160 
7161     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7162   }
7163   return SDValue();
7164 }
7165 
7166 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7167   if (Level >= AfterLegalizeTypes)
7168     return SDValue();
7169 
7170   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7171   SDValue Mask = MGT->getMask();
7172   SDLoc DL(N);
7173 
7174   // If the MGATHER result requires splitting and the mask is provided by a
7175   // SETCC, then split both nodes and its operands before legalization. This
7176   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7177   // and enables future optimizations (e.g. min/max pattern matching on X86).
7178 
7179   if (Mask.getOpcode() != ISD::SETCC)
7180     return SDValue();
7181 
7182   EVT VT = N->getValueType(0);
7183 
7184   // Check if any splitting is required.
7185   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7186       TargetLowering::TypeSplitVector)
7187     return SDValue();
7188 
7189   SDValue MaskLo, MaskHi, Lo, Hi;
7190   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7191 
7192   SDValue Src0 = MGT->getValue();
7193   SDValue Src0Lo, Src0Hi;
7194   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
7195 
7196   EVT LoVT, HiVT;
7197   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7198 
7199   SDValue Chain = MGT->getChain();
7200   EVT MemoryVT = MGT->getMemoryVT();
7201   unsigned Alignment = MGT->getOriginalAlignment();
7202 
7203   EVT LoMemVT, HiMemVT;
7204   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7205 
7206   SDValue Scale = MGT->getScale();
7207   SDValue BasePtr = MGT->getBasePtr();
7208   SDValue Index = MGT->getIndex();
7209   SDValue IndexLo, IndexHi;
7210   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7211 
7212   MachineMemOperand *MMO = DAG.getMachineFunction().
7213     getMachineMemOperand(MGT->getPointerInfo(),
7214                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7215                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7216 
7217   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
7218   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7219                            MMO);
7220 
7221   SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
7222   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7223                            MMO);
7224 
7225   AddToWorklist(Lo.getNode());
7226   AddToWorklist(Hi.getNode());
7227 
7228   // Build a factor node to remember that this load is independent of the
7229   // other one.
7230   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7231                       Hi.getValue(1));
7232 
7233   // Legalized the chain result - switch anything that used the old chain to
7234   // use the new one.
7235   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7236 
7237   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7238 
7239   SDValue RetOps[] = { GatherRes, Chain };
7240   return DAG.getMergeValues(RetOps, DL);
7241 }
7242 
7243 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7244   if (Level >= AfterLegalizeTypes)
7245     return SDValue();
7246 
7247   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7248   SDValue Mask = MLD->getMask();
7249   SDLoc DL(N);
7250 
7251   // If the MLOAD result requires splitting and the mask is provided by a
7252   // SETCC, then split both nodes and its operands before legalization. This
7253   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7254   // and enables future optimizations (e.g. min/max pattern matching on X86).
7255   if (Mask.getOpcode() == ISD::SETCC) {
7256     EVT VT = N->getValueType(0);
7257 
7258     // Check if any splitting is required.
7259     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7260         TargetLowering::TypeSplitVector)
7261       return SDValue();
7262 
7263     SDValue MaskLo, MaskHi, Lo, Hi;
7264     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7265 
7266     SDValue Src0 = MLD->getSrc0();
7267     SDValue Src0Lo, Src0Hi;
7268     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
7269 
7270     EVT LoVT, HiVT;
7271     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7272 
7273     SDValue Chain = MLD->getChain();
7274     SDValue Ptr   = MLD->getBasePtr();
7275     EVT MemoryVT = MLD->getMemoryVT();
7276     unsigned Alignment = MLD->getOriginalAlignment();
7277 
7278     // if Alignment is equal to the vector size,
7279     // take the half of it for the second part
7280     unsigned SecondHalfAlignment =
7281       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7282          Alignment/2 : Alignment;
7283 
7284     EVT LoMemVT, HiMemVT;
7285     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7286 
7287     MachineMemOperand *MMO = DAG.getMachineFunction().
7288     getMachineMemOperand(MLD->getPointerInfo(),
7289                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7290                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7291 
7292     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
7293                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7294 
7295     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7296                                      MLD->isExpandingLoad());
7297     unsigned HiOffset = LoMemVT.getStoreSize();
7298 
7299     MMO = DAG.getMachineFunction().getMachineMemOperand(
7300         MLD->getPointerInfo().getWithOffset(HiOffset),
7301         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7302         MLD->getAAInfo(), MLD->getRanges());
7303 
7304     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
7305                            ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7306 
7307     AddToWorklist(Lo.getNode());
7308     AddToWorklist(Hi.getNode());
7309 
7310     // Build a factor node to remember that this load is independent of the
7311     // other one.
7312     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7313                         Hi.getValue(1));
7314 
7315     // Legalized the chain result - switch anything that used the old chain to
7316     // use the new one.
7317     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7318 
7319     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7320 
7321     SDValue RetOps[] = { LoadRes, Chain };
7322     return DAG.getMergeValues(RetOps, DL);
7323   }
7324   return SDValue();
7325 }
7326 
7327 /// A vector select of 2 constant vectors can be simplified to math/logic to
7328 /// avoid a variable select instruction and possibly avoid constant loads.
7329 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7330   SDValue Cond = N->getOperand(0);
7331   SDValue N1 = N->getOperand(1);
7332   SDValue N2 = N->getOperand(2);
7333   EVT VT = N->getValueType(0);
7334   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7335       !TLI.convertSelectOfConstantsToMath(VT) ||
7336       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7337       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7338     return SDValue();
7339 
7340   // Check if we can use the condition value to increment/decrement a single
7341   // constant value. This simplifies a select to an add and removes a constant
7342   // load/materialization from the general case.
7343   bool AllAddOne = true;
7344   bool AllSubOne = true;
7345   unsigned Elts = VT.getVectorNumElements();
7346   for (unsigned i = 0; i != Elts; ++i) {
7347     SDValue N1Elt = N1.getOperand(i);
7348     SDValue N2Elt = N2.getOperand(i);
7349     if (N1Elt.isUndef() || N2Elt.isUndef())
7350       continue;
7351 
7352     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7353     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7354     if (C1 != C2 + 1)
7355       AllAddOne = false;
7356     if (C1 != C2 - 1)
7357       AllSubOne = false;
7358   }
7359 
7360   // Further simplifications for the extra-special cases where the constants are
7361   // all 0 or all -1 should be implemented as folds of these patterns.
7362   SDLoc DL(N);
7363   if (AllAddOne || AllSubOne) {
7364     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7365     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7366     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7367     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7368     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7369   }
7370 
7371   // The general case for select-of-constants:
7372   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7373   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7374   // leave that to a machine-specific pass.
7375   return SDValue();
7376 }
7377 
7378 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7379   SDValue N0 = N->getOperand(0);
7380   SDValue N1 = N->getOperand(1);
7381   SDValue N2 = N->getOperand(2);
7382   SDLoc DL(N);
7383 
7384   // fold (vselect C, X, X) -> X
7385   if (N1 == N2)
7386     return N1;
7387 
7388   // Canonicalize integer abs.
7389   // vselect (setg[te] X,  0),  X, -X ->
7390   // vselect (setgt    X, -1),  X, -X ->
7391   // vselect (setl[te] X,  0), -X,  X ->
7392   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7393   if (N0.getOpcode() == ISD::SETCC) {
7394     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7395     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7396     bool isAbs = false;
7397     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7398 
7399     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7400          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7401         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7402       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7403     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7404              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7405       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7406 
7407     if (isAbs) {
7408       EVT VT = LHS.getValueType();
7409       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7410         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7411 
7412       SDValue Shift = DAG.getNode(
7413           ISD::SRA, DL, VT, LHS,
7414           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7415       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7416       AddToWorklist(Shift.getNode());
7417       AddToWorklist(Add.getNode());
7418       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7419     }
7420 
7421     // If this select has a condition (setcc) with narrower operands than the
7422     // select, try to widen the compare to match the select width.
7423     // TODO: This should be extended to handle any constant.
7424     // TODO: This could be extended to handle non-loading patterns, but that
7425     //       requires thorough testing to avoid regressions.
7426     if (isNullConstantOrNullSplatConstant(RHS)) {
7427       EVT NarrowVT = LHS.getValueType();
7428       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7429       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7430       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7431       unsigned WideWidth = WideVT.getScalarSizeInBits();
7432       bool IsSigned = isSignedIntSetCC(CC);
7433       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7434       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7435           SetCCWidth != 1 && SetCCWidth < WideWidth &&
7436           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7437           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7438         // Both compare operands can be widened for free. The LHS can use an
7439         // extended load, and the RHS is a constant:
7440         //   vselect (ext (setcc load(X), C)), N1, N2 -->
7441         //   vselect (setcc extload(X), C'), N1, N2
7442         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7443         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7444         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7445         EVT WideSetCCVT = getSetCCResultType(WideVT);
7446         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7447         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7448       }
7449     }
7450   }
7451 
7452   if (SimplifySelectOps(N, N1, N2))
7453     return SDValue(N, 0);  // Don't revisit N.
7454 
7455   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7456   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7457     return N1;
7458   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7459   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7460     return N2;
7461 
7462   // The ConvertSelectToConcatVector function is assuming both the above
7463   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7464   // and addressed.
7465   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7466       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7467       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7468     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7469       return CV;
7470   }
7471 
7472   if (SDValue V = foldVSelectOfConstants(N))
7473     return V;
7474 
7475   return SDValue();
7476 }
7477 
7478 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7479   SDValue N0 = N->getOperand(0);
7480   SDValue N1 = N->getOperand(1);
7481   SDValue N2 = N->getOperand(2);
7482   SDValue N3 = N->getOperand(3);
7483   SDValue N4 = N->getOperand(4);
7484   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7485 
7486   // fold select_cc lhs, rhs, x, x, cc -> x
7487   if (N2 == N3)
7488     return N2;
7489 
7490   // Determine if the condition we're dealing with is constant
7491   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7492                                   CC, SDLoc(N), false)) {
7493     AddToWorklist(SCC.getNode());
7494 
7495     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7496       if (!SCCC->isNullValue())
7497         return N2;    // cond always true -> true val
7498       else
7499         return N3;    // cond always false -> false val
7500     } else if (SCC->isUndef()) {
7501       // When the condition is UNDEF, just return the first operand. This is
7502       // coherent the DAG creation, no setcc node is created in this case
7503       return N2;
7504     } else if (SCC.getOpcode() == ISD::SETCC) {
7505       // Fold to a simpler select_cc
7506       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7507                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7508                          SCC.getOperand(2));
7509     }
7510   }
7511 
7512   // If we can fold this based on the true/false value, do so.
7513   if (SimplifySelectOps(N, N2, N3))
7514     return SDValue(N, 0);  // Don't revisit N.
7515 
7516   // fold select_cc into other things, such as min/max/abs
7517   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7518 }
7519 
7520 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7521   // setcc is very commonly used as an argument to brcond. This pattern
7522   // also lend itself to numerous combines and, as a result, it is desired
7523   // we keep the argument to a brcond as a setcc as much as possible.
7524   bool PreferSetCC =
7525       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7526 
7527   SDValue Combined = SimplifySetCC(
7528       N->getValueType(0), N->getOperand(0), N->getOperand(1),
7529       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7530 
7531   if (!Combined)
7532     return SDValue();
7533 
7534   // If we prefer to have a setcc, and we don't, we'll try our best to
7535   // recreate one using rebuildSetCC.
7536   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7537     SDValue NewSetCC = rebuildSetCC(Combined);
7538 
7539     // We don't have anything interesting to combine to.
7540     if (NewSetCC.getNode() == N)
7541       return SDValue();
7542 
7543     if (NewSetCC)
7544       return NewSetCC;
7545   }
7546 
7547   return Combined;
7548 }
7549 
7550 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7551   SDValue LHS = N->getOperand(0);
7552   SDValue RHS = N->getOperand(1);
7553   SDValue Carry = N->getOperand(2);
7554   SDValue Cond = N->getOperand(3);
7555 
7556   // If Carry is false, fold to a regular SETCC.
7557   if (isNullConstant(Carry))
7558     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7559 
7560   return SDValue();
7561 }
7562 
7563 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7564 /// a build_vector of constants.
7565 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7566 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7567 /// Vector extends are not folded if operations are legal; this is to
7568 /// avoid introducing illegal build_vector dag nodes.
7569 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7570                                          SelectionDAG &DAG, bool LegalTypes,
7571                                          bool LegalOperations) {
7572   unsigned Opcode = N->getOpcode();
7573   SDValue N0 = N->getOperand(0);
7574   EVT VT = N->getValueType(0);
7575 
7576   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7577          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7578          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7579          && "Expected EXTEND dag node in input!");
7580 
7581   // fold (sext c1) -> c1
7582   // fold (zext c1) -> c1
7583   // fold (aext c1) -> c1
7584   if (isa<ConstantSDNode>(N0))
7585     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7586 
7587   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7588   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7589   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7590   EVT SVT = VT.getScalarType();
7591   if (!(VT.isVector() &&
7592       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7593       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7594     return nullptr;
7595 
7596   // We can fold this node into a build_vector.
7597   unsigned VTBits = SVT.getSizeInBits();
7598   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7599   SmallVector<SDValue, 8> Elts;
7600   unsigned NumElts = VT.getVectorNumElements();
7601   SDLoc DL(N);
7602 
7603   for (unsigned i=0; i != NumElts; ++i) {
7604     SDValue Op = N0->getOperand(i);
7605     if (Op->isUndef()) {
7606       Elts.push_back(DAG.getUNDEF(SVT));
7607       continue;
7608     }
7609 
7610     SDLoc DL(Op);
7611     // Get the constant value and if needed trunc it to the size of the type.
7612     // Nodes like build_vector might have constants wider than the scalar type.
7613     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7614     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7615       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7616     else
7617       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7618   }
7619 
7620   return DAG.getBuildVector(VT, DL, Elts).getNode();
7621 }
7622 
7623 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7624 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7625 // transformation. Returns true if extension are possible and the above
7626 // mentioned transformation is profitable.
7627 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7628                                     unsigned ExtOpc,
7629                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7630                                     const TargetLowering &TLI) {
7631   bool HasCopyToRegUses = false;
7632   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7633   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7634                             UE = N0.getNode()->use_end();
7635        UI != UE; ++UI) {
7636     SDNode *User = *UI;
7637     if (User == N)
7638       continue;
7639     if (UI.getUse().getResNo() != N0.getResNo())
7640       continue;
7641     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7642     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7643       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7644       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7645         // Sign bits will be lost after a zext.
7646         return false;
7647       bool Add = false;
7648       for (unsigned i = 0; i != 2; ++i) {
7649         SDValue UseOp = User->getOperand(i);
7650         if (UseOp == N0)
7651           continue;
7652         if (!isa<ConstantSDNode>(UseOp))
7653           return false;
7654         Add = true;
7655       }
7656       if (Add)
7657         ExtendNodes.push_back(User);
7658       continue;
7659     }
7660     // If truncates aren't free and there are users we can't
7661     // extend, it isn't worthwhile.
7662     if (!isTruncFree)
7663       return false;
7664     // Remember if this value is live-out.
7665     if (User->getOpcode() == ISD::CopyToReg)
7666       HasCopyToRegUses = true;
7667   }
7668 
7669   if (HasCopyToRegUses) {
7670     bool BothLiveOut = false;
7671     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7672          UI != UE; ++UI) {
7673       SDUse &Use = UI.getUse();
7674       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
7675         BothLiveOut = true;
7676         break;
7677       }
7678     }
7679     if (BothLiveOut)
7680       // Both unextended and extended values are live out. There had better be
7681       // a good reason for the transformation.
7682       return ExtendNodes.size();
7683   }
7684   return true;
7685 }
7686 
7687 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
7688                                   SDValue OrigLoad, SDValue ExtLoad,
7689                                   ISD::NodeType ExtType) {
7690   // Extend SetCC uses if necessary.
7691   SDLoc DL(ExtLoad);
7692   for (SDNode *SetCC : SetCCs) {
7693     SmallVector<SDValue, 4> Ops;
7694 
7695     for (unsigned j = 0; j != 2; ++j) {
7696       SDValue SOp = SetCC->getOperand(j);
7697       if (SOp == OrigLoad)
7698         Ops.push_back(ExtLoad);
7699       else
7700         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
7701     }
7702 
7703     Ops.push_back(SetCC->getOperand(2));
7704     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
7705   }
7706 }
7707 
7708 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
7709 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
7710   SDValue N0 = N->getOperand(0);
7711   EVT DstVT = N->getValueType(0);
7712   EVT SrcVT = N0.getValueType();
7713 
7714   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
7715           N->getOpcode() == ISD::ZERO_EXTEND) &&
7716          "Unexpected node type (not an extend)!");
7717 
7718   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
7719   // For example, on a target with legal v4i32, but illegal v8i32, turn:
7720   //   (v8i32 (sext (v8i16 (load x))))
7721   // into:
7722   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
7723   //                          (v4i32 (sextload (x + 16)))))
7724   // Where uses of the original load, i.e.:
7725   //   (v8i16 (load x))
7726   // are replaced with:
7727   //   (v8i16 (truncate
7728   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
7729   //                            (v4i32 (sextload (x + 16)))))))
7730   //
7731   // This combine is only applicable to illegal, but splittable, vectors.
7732   // All legal types, and illegal non-vector types, are handled elsewhere.
7733   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
7734   //
7735   if (N0->getOpcode() != ISD::LOAD)
7736     return SDValue();
7737 
7738   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7739 
7740   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
7741       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
7742       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
7743     return SDValue();
7744 
7745   SmallVector<SDNode *, 4> SetCCs;
7746   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
7747     return SDValue();
7748 
7749   ISD::LoadExtType ExtType =
7750       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7751 
7752   // Try to split the vector types to get down to legal types.
7753   EVT SplitSrcVT = SrcVT;
7754   EVT SplitDstVT = DstVT;
7755   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
7756          SplitSrcVT.getVectorNumElements() > 1) {
7757     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
7758     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
7759   }
7760 
7761   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
7762     return SDValue();
7763 
7764   SDLoc DL(N);
7765   const unsigned NumSplits =
7766       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
7767   const unsigned Stride = SplitSrcVT.getStoreSize();
7768   SmallVector<SDValue, 4> Loads;
7769   SmallVector<SDValue, 4> Chains;
7770 
7771   SDValue BasePtr = LN0->getBasePtr();
7772   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
7773     const unsigned Offset = Idx * Stride;
7774     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
7775 
7776     SDValue SplitLoad = DAG.getExtLoad(
7777         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
7778         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
7779         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7780 
7781     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
7782                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
7783 
7784     Loads.push_back(SplitLoad.getValue(0));
7785     Chains.push_back(SplitLoad.getValue(1));
7786   }
7787 
7788   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
7789   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
7790 
7791   // Simplify TF.
7792   AddToWorklist(NewChain.getNode());
7793 
7794   CombineTo(N, NewValue);
7795 
7796   // Replace uses of the original load (before extension)
7797   // with a truncate of the concatenated sextloaded vectors.
7798   SDValue Trunc =
7799       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
7800   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
7801   CombineTo(N0.getNode(), Trunc, NewChain);
7802   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7803 }
7804 
7805 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
7806 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
7807 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
7808   assert(N->getOpcode() == ISD::ZERO_EXTEND);
7809   EVT VT = N->getValueType(0);
7810 
7811   // and/or/xor
7812   SDValue N0 = N->getOperand(0);
7813   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
7814         N0.getOpcode() == ISD::XOR) ||
7815       N0.getOperand(1).getOpcode() != ISD::Constant ||
7816       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
7817     return SDValue();
7818 
7819   // shl/shr
7820   SDValue N1 = N0->getOperand(0);
7821   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
7822       N1.getOperand(1).getOpcode() != ISD::Constant ||
7823       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
7824     return SDValue();
7825 
7826   // load
7827   if (!isa<LoadSDNode>(N1.getOperand(0)))
7828     return SDValue();
7829   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
7830   EVT MemVT = Load->getMemoryVT();
7831   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
7832       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
7833     return SDValue();
7834 
7835 
7836   // If the shift op is SHL, the logic op must be AND, otherwise the result
7837   // will be wrong.
7838   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
7839     return SDValue();
7840 
7841   if (!N0.hasOneUse() || !N1.hasOneUse())
7842     return SDValue();
7843 
7844   SmallVector<SDNode*, 4> SetCCs;
7845   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
7846                                ISD::ZERO_EXTEND, SetCCs, TLI))
7847     return SDValue();
7848 
7849   // Actually do the transformation.
7850   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
7851                                    Load->getChain(), Load->getBasePtr(),
7852                                    Load->getMemoryVT(), Load->getMemOperand());
7853 
7854   SDLoc DL1(N1);
7855   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
7856                               N1.getOperand(1));
7857 
7858   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
7859   Mask = Mask.zext(VT.getSizeInBits());
7860   SDLoc DL0(N0);
7861   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
7862                             DAG.getConstant(Mask, DL0, VT));
7863 
7864   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
7865   CombineTo(N, And);
7866   if (SDValue(Load, 0).hasOneUse()) {
7867     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
7868   } else {
7869     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
7870                                 Load->getValueType(0), ExtLoad);
7871     CombineTo(Load, Trunc, ExtLoad.getValue(1));
7872   }
7873   return SDValue(N,0); // Return N so it doesn't get rechecked!
7874 }
7875 
7876 /// If we're narrowing or widening the result of a vector select and the final
7877 /// size is the same size as a setcc (compare) feeding the select, then try to
7878 /// apply the cast operation to the select's operands because matching vector
7879 /// sizes for a select condition and other operands should be more efficient.
7880 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
7881   unsigned CastOpcode = Cast->getOpcode();
7882   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
7883           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
7884           CastOpcode == ISD::FP_ROUND) &&
7885          "Unexpected opcode for vector select narrowing/widening");
7886 
7887   // We only do this transform before legal ops because the pattern may be
7888   // obfuscated by target-specific operations after legalization. Do not create
7889   // an illegal select op, however, because that may be difficult to lower.
7890   EVT VT = Cast->getValueType(0);
7891   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
7892     return SDValue();
7893 
7894   SDValue VSel = Cast->getOperand(0);
7895   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
7896       VSel.getOperand(0).getOpcode() != ISD::SETCC)
7897     return SDValue();
7898 
7899   // Does the setcc have the same vector size as the casted select?
7900   SDValue SetCC = VSel.getOperand(0);
7901   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
7902   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
7903     return SDValue();
7904 
7905   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
7906   SDValue A = VSel.getOperand(1);
7907   SDValue B = VSel.getOperand(2);
7908   SDValue CastA, CastB;
7909   SDLoc DL(Cast);
7910   if (CastOpcode == ISD::FP_ROUND) {
7911     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
7912     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
7913     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
7914   } else {
7915     CastA = DAG.getNode(CastOpcode, DL, VT, A);
7916     CastB = DAG.getNode(CastOpcode, DL, VT, B);
7917   }
7918   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
7919 }
7920 
7921 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
7922 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
7923 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
7924                                      const TargetLowering &TLI, EVT VT,
7925                                      bool LegalOperations, SDNode *N,
7926                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
7927   SDNode *N0Node = N0.getNode();
7928   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
7929                                                    : ISD::isZEXTLoad(N0Node);
7930   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
7931       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
7932     return {};
7933 
7934   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7935   EVT MemVT = LN0->getMemoryVT();
7936   if ((LegalOperations || LN0->isVolatile()) &&
7937       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
7938     return {};
7939 
7940   SDValue ExtLoad =
7941       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
7942                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7943   Combiner.CombineTo(N, ExtLoad);
7944   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7945   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7946 }
7947 
7948 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
7949 // Only generate vector extloads when 1) they're legal, and 2) they are
7950 // deemed desirable by the target.
7951 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
7952                                   const TargetLowering &TLI, EVT VT,
7953                                   bool LegalOperations, SDNode *N, SDValue N0,
7954                                   ISD::LoadExtType ExtLoadType,
7955                                   ISD::NodeType ExtOpc) {
7956   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
7957       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
7958       ((LegalOperations || VT.isVector() ||
7959         cast<LoadSDNode>(N0)->isVolatile()) &&
7960        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
7961     return {};
7962 
7963   bool DoXform = true;
7964   SmallVector<SDNode *, 4> SetCCs;
7965   if (!N0.hasOneUse())
7966     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
7967   if (VT.isVector())
7968     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
7969   if (!DoXform)
7970     return {};
7971 
7972   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7973   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
7974                                    LN0->getBasePtr(), N0.getValueType(),
7975                                    LN0->getMemOperand());
7976   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
7977   // If the load value is used only by N, replace it via CombineTo N.
7978   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
7979   Combiner.CombineTo(N, ExtLoad);
7980   if (NoReplaceTrunc) {
7981     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
7982   } else {
7983     SDValue Trunc =
7984         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
7985     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
7986   }
7987   return SDValue(N, 0); // Return N so it doesn't get rechecked!
7988 }
7989 
7990 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
7991   SDValue N0 = N->getOperand(0);
7992   EVT VT = N->getValueType(0);
7993   SDLoc DL(N);
7994 
7995   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7996                                               LegalOperations))
7997     return SDValue(Res, 0);
7998 
7999   // fold (sext (sext x)) -> (sext x)
8000   // fold (sext (aext x)) -> (sext x)
8001   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8002     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8003 
8004   if (N0.getOpcode() == ISD::TRUNCATE) {
8005     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8006     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8007     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8008       SDNode *oye = N0.getOperand(0).getNode();
8009       if (NarrowLoad.getNode() != N0.getNode()) {
8010         CombineTo(N0.getNode(), NarrowLoad);
8011         // CombineTo deleted the truncate, if needed, but not what's under it.
8012         AddToWorklist(oye);
8013       }
8014       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8015     }
8016 
8017     // See if the value being truncated is already sign extended.  If so, just
8018     // eliminate the trunc/sext pair.
8019     SDValue Op = N0.getOperand(0);
8020     unsigned OpBits   = Op.getScalarValueSizeInBits();
8021     unsigned MidBits  = N0.getScalarValueSizeInBits();
8022     unsigned DestBits = VT.getScalarSizeInBits();
8023     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8024 
8025     if (OpBits == DestBits) {
8026       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8027       // bits, it is already ready.
8028       if (NumSignBits > DestBits-MidBits)
8029         return Op;
8030     } else if (OpBits < DestBits) {
8031       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8032       // bits, just sext from i32.
8033       if (NumSignBits > OpBits-MidBits)
8034         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8035     } else {
8036       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8037       // bits, just truncate to i32.
8038       if (NumSignBits > OpBits-MidBits)
8039         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8040     }
8041 
8042     // fold (sext (truncate x)) -> (sextinreg x).
8043     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8044                                                  N0.getValueType())) {
8045       if (OpBits < DestBits)
8046         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8047       else if (OpBits > DestBits)
8048         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8049       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8050                          DAG.getValueType(N0.getValueType()));
8051     }
8052   }
8053 
8054   // Try to simplify (sext (load x)).
8055   if (SDValue foldedExt =
8056           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8057                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8058     return foldedExt;
8059 
8060   // fold (sext (load x)) to multiple smaller sextloads.
8061   // Only on illegal but splittable vectors.
8062   if (SDValue ExtLoad = CombineExtLoad(N))
8063     return ExtLoad;
8064 
8065   // Try to simplify (sext (sextload x)).
8066   if (SDValue foldedExt = tryToFoldExtOfExtload(
8067           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8068     return foldedExt;
8069 
8070   // fold (sext (and/or/xor (load x), cst)) ->
8071   //      (and/or/xor (sextload x), (sext cst))
8072   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8073        N0.getOpcode() == ISD::XOR) &&
8074       isa<LoadSDNode>(N0.getOperand(0)) &&
8075       N0.getOperand(1).getOpcode() == ISD::Constant &&
8076       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8077     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8078     EVT MemVT = LN00->getMemoryVT();
8079     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8080       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8081       SmallVector<SDNode*, 4> SetCCs;
8082       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8083                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8084       if (DoXform) {
8085         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8086                                          LN00->getChain(), LN00->getBasePtr(),
8087                                          LN00->getMemoryVT(),
8088                                          LN00->getMemOperand());
8089         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8090         Mask = Mask.sext(VT.getSizeInBits());
8091         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8092                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8093         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8094         bool NoReplaceTruncAnd = !N0.hasOneUse();
8095         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8096         CombineTo(N, And);
8097         // If N0 has multiple uses, change other uses as well.
8098         if (NoReplaceTruncAnd) {
8099           SDValue TruncAnd =
8100               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8101           CombineTo(N0.getNode(), TruncAnd);
8102         }
8103         if (NoReplaceTrunc) {
8104           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8105         } else {
8106           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8107                                       LN00->getValueType(0), ExtLoad);
8108           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8109         }
8110         return SDValue(N,0); // Return N so it doesn't get rechecked!
8111       }
8112     }
8113   }
8114 
8115   if (N0.getOpcode() == ISD::SETCC) {
8116     SDValue N00 = N0.getOperand(0);
8117     SDValue N01 = N0.getOperand(1);
8118     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8119     EVT N00VT = N0.getOperand(0).getValueType();
8120 
8121     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8122     // Only do this before legalize for now.
8123     if (VT.isVector() && !LegalOperations &&
8124         TLI.getBooleanContents(N00VT) ==
8125             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8126       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8127       // of the same size as the compared operands. Only optimize sext(setcc())
8128       // if this is the case.
8129       EVT SVT = getSetCCResultType(N00VT);
8130 
8131       // We know that the # elements of the results is the same as the
8132       // # elements of the compare (and the # elements of the compare result
8133       // for that matter).  Check to see that they are the same size.  If so,
8134       // we know that the element size of the sext'd result matches the
8135       // element size of the compare operands.
8136       if (VT.getSizeInBits() == SVT.getSizeInBits())
8137         return DAG.getSetCC(DL, VT, N00, N01, CC);
8138 
8139       // If the desired elements are smaller or larger than the source
8140       // elements, we can use a matching integer vector type and then
8141       // truncate/sign extend.
8142       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8143       if (SVT == MatchingVecType) {
8144         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8145         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8146       }
8147     }
8148 
8149     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8150     // Here, T can be 1 or -1, depending on the type of the setcc and
8151     // getBooleanContents().
8152     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8153 
8154     // To determine the "true" side of the select, we need to know the high bit
8155     // of the value returned by the setcc if it evaluates to true.
8156     // If the type of the setcc is i1, then the true case of the select is just
8157     // sext(i1 1), that is, -1.
8158     // If the type of the setcc is larger (say, i8) then the value of the high
8159     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8160     // of the appropriate width.
8161     SDValue ExtTrueVal = (SetCCWidth == 1)
8162                              ? DAG.getAllOnesConstant(DL, VT)
8163                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8164     SDValue Zero = DAG.getConstant(0, DL, VT);
8165     if (SDValue SCC =
8166             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8167       return SCC;
8168 
8169     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8170       EVT SetCCVT = getSetCCResultType(N00VT);
8171       // Don't do this transform for i1 because there's a select transform
8172       // that would reverse it.
8173       // TODO: We should not do this transform at all without a target hook
8174       // because a sext is likely cheaper than a select?
8175       if (SetCCVT.getScalarSizeInBits() != 1 &&
8176           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8177         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8178         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8179       }
8180     }
8181   }
8182 
8183   // fold (sext x) -> (zext x) if the sign bit is known zero.
8184   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8185       DAG.SignBitIsZero(N0))
8186     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8187 
8188   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8189     return NewVSel;
8190 
8191   return SDValue();
8192 }
8193 
8194 // isTruncateOf - If N is a truncate of some other value, return true, record
8195 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8196 // This function computes KnownBits to avoid a duplicated call to
8197 // computeKnownBits in the caller.
8198 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8199                          KnownBits &Known) {
8200   if (N->getOpcode() == ISD::TRUNCATE) {
8201     Op = N->getOperand(0);
8202     DAG.computeKnownBits(Op, Known);
8203     return true;
8204   }
8205 
8206   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
8207       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
8208     return false;
8209 
8210   SDValue Op0 = N->getOperand(0);
8211   SDValue Op1 = N->getOperand(1);
8212   assert(Op0.getValueType() == Op1.getValueType());
8213 
8214   if (isNullConstant(Op0))
8215     Op = Op1;
8216   else if (isNullConstant(Op1))
8217     Op = Op0;
8218   else
8219     return false;
8220 
8221   DAG.computeKnownBits(Op, Known);
8222 
8223   if (!(Known.Zero | 1).isAllOnesValue())
8224     return false;
8225 
8226   return true;
8227 }
8228 
8229 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8230   SDValue N0 = N->getOperand(0);
8231   EVT VT = N->getValueType(0);
8232 
8233   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8234                                               LegalOperations))
8235     return SDValue(Res, 0);
8236 
8237   // fold (zext (zext x)) -> (zext x)
8238   // fold (zext (aext x)) -> (zext x)
8239   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8240     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8241                        N0.getOperand(0));
8242 
8243   // fold (zext (truncate x)) -> (zext x) or
8244   //      (zext (truncate x)) -> (truncate x)
8245   // This is valid when the truncated bits of x are already zero.
8246   // FIXME: We should extend this to work for vectors too.
8247   SDValue Op;
8248   KnownBits Known;
8249   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
8250     APInt TruncatedBits =
8251       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
8252       APInt(Op.getValueSizeInBits(), 0) :
8253       APInt::getBitsSet(Op.getValueSizeInBits(),
8254                         N0.getValueSizeInBits(),
8255                         std::min(Op.getValueSizeInBits(),
8256                                  VT.getSizeInBits()));
8257     if (TruncatedBits.isSubsetOf(Known.Zero))
8258       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8259   }
8260 
8261   // fold (zext (truncate x)) -> (and x, mask)
8262   if (N0.getOpcode() == ISD::TRUNCATE) {
8263     // fold (zext (truncate (load x))) -> (zext (smaller load x))
8264     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8265     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8266       SDNode *oye = N0.getOperand(0).getNode();
8267       if (NarrowLoad.getNode() != N0.getNode()) {
8268         CombineTo(N0.getNode(), NarrowLoad);
8269         // CombineTo deleted the truncate, if needed, but not what's under it.
8270         AddToWorklist(oye);
8271       }
8272       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8273     }
8274 
8275     EVT SrcVT = N0.getOperand(0).getValueType();
8276     EVT MinVT = N0.getValueType();
8277 
8278     // Try to mask before the extension to avoid having to generate a larger mask,
8279     // possibly over several sub-vectors.
8280     if (SrcVT.bitsLT(VT) && VT.isVector()) {
8281       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8282                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8283         SDValue Op = N0.getOperand(0);
8284         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8285         AddToWorklist(Op.getNode());
8286         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8287         // Transfer the debug info; the new node is equivalent to N0.
8288         DAG.transferDbgValues(N0, ZExtOrTrunc);
8289         return ZExtOrTrunc;
8290       }
8291     }
8292 
8293     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8294       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8295       AddToWorklist(Op.getNode());
8296       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8297       // We may safely transfer the debug info describing the truncate node over
8298       // to the equivalent and operation.
8299       DAG.transferDbgValues(N0, And);
8300       return And;
8301     }
8302   }
8303 
8304   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8305   // if either of the casts is not free.
8306   if (N0.getOpcode() == ISD::AND &&
8307       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8308       N0.getOperand(1).getOpcode() == ISD::Constant &&
8309       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8310                            N0.getValueType()) ||
8311        !TLI.isZExtFree(N0.getValueType(), VT))) {
8312     SDValue X = N0.getOperand(0).getOperand(0);
8313     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8314     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8315     Mask = Mask.zext(VT.getSizeInBits());
8316     SDLoc DL(N);
8317     return DAG.getNode(ISD::AND, DL, VT,
8318                        X, DAG.getConstant(Mask, DL, VT));
8319   }
8320 
8321   // Try to simplify (zext (load x)).
8322   if (SDValue foldedExt =
8323           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8324                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8325     return foldedExt;
8326 
8327   // fold (zext (load x)) to multiple smaller zextloads.
8328   // Only on illegal but splittable vectors.
8329   if (SDValue ExtLoad = CombineExtLoad(N))
8330     return ExtLoad;
8331 
8332   // fold (zext (and/or/xor (load x), cst)) ->
8333   //      (and/or/xor (zextload x), (zext cst))
8334   // Unless (and (load x) cst) will match as a zextload already and has
8335   // additional users.
8336   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8337        N0.getOpcode() == ISD::XOR) &&
8338       isa<LoadSDNode>(N0.getOperand(0)) &&
8339       N0.getOperand(1).getOpcode() == ISD::Constant &&
8340       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8341     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8342     EVT MemVT = LN00->getMemoryVT();
8343     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8344         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8345       bool DoXform = true;
8346       SmallVector<SDNode*, 4> SetCCs;
8347       if (!N0.hasOneUse()) {
8348         if (N0.getOpcode() == ISD::AND) {
8349           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8350           EVT LoadResultTy = AndC->getValueType(0);
8351           EVT ExtVT;
8352           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8353             DoXform = false;
8354         }
8355       }
8356       if (DoXform)
8357         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8358                                           ISD::ZERO_EXTEND, SetCCs, TLI);
8359       if (DoXform) {
8360         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8361                                          LN00->getChain(), LN00->getBasePtr(),
8362                                          LN00->getMemoryVT(),
8363                                          LN00->getMemOperand());
8364         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8365         Mask = Mask.zext(VT.getSizeInBits());
8366         SDLoc DL(N);
8367         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8368                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8369         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8370         bool NoReplaceTruncAnd = !N0.hasOneUse();
8371         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8372         CombineTo(N, And);
8373         // If N0 has multiple uses, change other uses as well.
8374         if (NoReplaceTruncAnd) {
8375           SDValue TruncAnd =
8376               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8377           CombineTo(N0.getNode(), TruncAnd);
8378         }
8379         if (NoReplaceTrunc) {
8380           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8381         } else {
8382           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8383                                       LN00->getValueType(0), ExtLoad);
8384           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8385         }
8386         return SDValue(N,0); // Return N so it doesn't get rechecked!
8387       }
8388     }
8389   }
8390 
8391   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8392   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8393   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8394     return ZExtLoad;
8395 
8396   // Try to simplify (zext (zextload x)).
8397   if (SDValue foldedExt = tryToFoldExtOfExtload(
8398           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8399     return foldedExt;
8400 
8401   if (N0.getOpcode() == ISD::SETCC) {
8402     // Only do this before legalize for now.
8403     if (!LegalOperations && VT.isVector() &&
8404         N0.getValueType().getVectorElementType() == MVT::i1) {
8405       EVT N00VT = N0.getOperand(0).getValueType();
8406       if (getSetCCResultType(N00VT) == N0.getValueType())
8407         return SDValue();
8408 
8409       // We know that the # elements of the results is the same as the #
8410       // elements of the compare (and the # elements of the compare result for
8411       // that matter). Check to see that they are the same size. If so, we know
8412       // that the element size of the sext'd result matches the element size of
8413       // the compare operands.
8414       SDLoc DL(N);
8415       SDValue VecOnes = DAG.getConstant(1, DL, VT);
8416       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8417         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8418         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8419                                      N0.getOperand(1), N0.getOperand(2));
8420         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8421       }
8422 
8423       // If the desired elements are smaller or larger than the source
8424       // elements we can use a matching integer vector type and then
8425       // truncate/sign extend.
8426       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8427       SDValue VsetCC =
8428           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8429                       N0.getOperand(1), N0.getOperand(2));
8430       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8431                          VecOnes);
8432     }
8433 
8434     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8435     SDLoc DL(N);
8436     if (SDValue SCC = SimplifySelectCC(
8437             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8438             DAG.getConstant(0, DL, VT),
8439             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8440       return SCC;
8441   }
8442 
8443   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8444   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8445       isa<ConstantSDNode>(N0.getOperand(1)) &&
8446       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8447       N0.hasOneUse()) {
8448     SDValue ShAmt = N0.getOperand(1);
8449     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8450     if (N0.getOpcode() == ISD::SHL) {
8451       SDValue InnerZExt = N0.getOperand(0);
8452       // If the original shl may be shifting out bits, do not perform this
8453       // transformation.
8454       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8455         InnerZExt.getOperand(0).getValueSizeInBits();
8456       if (ShAmtVal > KnownZeroBits)
8457         return SDValue();
8458     }
8459 
8460     SDLoc DL(N);
8461 
8462     // Ensure that the shift amount is wide enough for the shifted value.
8463     if (VT.getSizeInBits() >= 256)
8464       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8465 
8466     return DAG.getNode(N0.getOpcode(), DL, VT,
8467                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8468                        ShAmt);
8469   }
8470 
8471   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8472     return NewVSel;
8473 
8474   return SDValue();
8475 }
8476 
8477 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8478   SDValue N0 = N->getOperand(0);
8479   EVT VT = N->getValueType(0);
8480 
8481   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8482                                               LegalOperations))
8483     return SDValue(Res, 0);
8484 
8485   // fold (aext (aext x)) -> (aext x)
8486   // fold (aext (zext x)) -> (zext x)
8487   // fold (aext (sext x)) -> (sext x)
8488   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8489       N0.getOpcode() == ISD::ZERO_EXTEND ||
8490       N0.getOpcode() == ISD::SIGN_EXTEND)
8491     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8492 
8493   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8494   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8495   if (N0.getOpcode() == ISD::TRUNCATE) {
8496     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8497       SDNode *oye = N0.getOperand(0).getNode();
8498       if (NarrowLoad.getNode() != N0.getNode()) {
8499         CombineTo(N0.getNode(), NarrowLoad);
8500         // CombineTo deleted the truncate, if needed, but not what's under it.
8501         AddToWorklist(oye);
8502       }
8503       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8504     }
8505   }
8506 
8507   // fold (aext (truncate x))
8508   if (N0.getOpcode() == ISD::TRUNCATE)
8509     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8510 
8511   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8512   // if the trunc is not free.
8513   if (N0.getOpcode() == ISD::AND &&
8514       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8515       N0.getOperand(1).getOpcode() == ISD::Constant &&
8516       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8517                           N0.getValueType())) {
8518     SDLoc DL(N);
8519     SDValue X = N0.getOperand(0).getOperand(0);
8520     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8521     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8522     Mask = Mask.zext(VT.getSizeInBits());
8523     return DAG.getNode(ISD::AND, DL, VT,
8524                        X, DAG.getConstant(Mask, DL, VT));
8525   }
8526 
8527   // fold (aext (load x)) -> (aext (truncate (extload x)))
8528   // None of the supported targets knows how to perform load and any_ext
8529   // on vectors in one instruction.  We only perform this transformation on
8530   // scalars.
8531   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8532       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8533       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8534     bool DoXform = true;
8535     SmallVector<SDNode*, 4> SetCCs;
8536     if (!N0.hasOneUse())
8537       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8538                                         TLI);
8539     if (DoXform) {
8540       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8541       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8542                                        LN0->getChain(),
8543                                        LN0->getBasePtr(), N0.getValueType(),
8544                                        LN0->getMemOperand());
8545       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
8546       // If the load value is used only by N, replace it via CombineTo N.
8547       bool NoReplaceTrunc = N0.hasOneUse();
8548       CombineTo(N, ExtLoad);
8549       if (NoReplaceTrunc) {
8550         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8551       } else {
8552         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8553                                     N0.getValueType(), ExtLoad);
8554         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8555       }
8556       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8557     }
8558   }
8559 
8560   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8561   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8562   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8563   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8564       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8565     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8566     ISD::LoadExtType ExtType = LN0->getExtensionType();
8567     EVT MemVT = LN0->getMemoryVT();
8568     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8569       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8570                                        VT, LN0->getChain(), LN0->getBasePtr(),
8571                                        MemVT, LN0->getMemOperand());
8572       CombineTo(N, ExtLoad);
8573       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8574       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8575     }
8576   }
8577 
8578   if (N0.getOpcode() == ISD::SETCC) {
8579     // For vectors:
8580     // aext(setcc) -> vsetcc
8581     // aext(setcc) -> truncate(vsetcc)
8582     // aext(setcc) -> aext(vsetcc)
8583     // Only do this before legalize for now.
8584     if (VT.isVector() && !LegalOperations) {
8585       EVT N00VT = N0.getOperand(0).getValueType();
8586       if (getSetCCResultType(N00VT) == N0.getValueType())
8587         return SDValue();
8588 
8589       // We know that the # elements of the results is the same as the
8590       // # elements of the compare (and the # elements of the compare result
8591       // for that matter).  Check to see that they are the same size.  If so,
8592       // we know that the element size of the sext'd result matches the
8593       // element size of the compare operands.
8594       if (VT.getSizeInBits() == N00VT.getSizeInBits())
8595         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8596                              N0.getOperand(1),
8597                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
8598       // If the desired elements are smaller or larger than the source
8599       // elements we can use a matching integer vector type and then
8600       // truncate/any extend
8601       else {
8602         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8603         SDValue VsetCC =
8604           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8605                         N0.getOperand(1),
8606                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
8607         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8608       }
8609     }
8610 
8611     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8612     SDLoc DL(N);
8613     if (SDValue SCC = SimplifySelectCC(
8614             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8615             DAG.getConstant(0, DL, VT),
8616             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8617       return SCC;
8618   }
8619 
8620   return SDValue();
8621 }
8622 
8623 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
8624   unsigned Opcode = N->getOpcode();
8625   SDValue N0 = N->getOperand(0);
8626   SDValue N1 = N->getOperand(1);
8627   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
8628 
8629   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
8630   if (N0.getOpcode() == Opcode &&
8631       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
8632     return N0;
8633 
8634   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
8635       N0.getOperand(0).getOpcode() == Opcode) {
8636     // We have an assert, truncate, assert sandwich. Make one stronger assert
8637     // by asserting on the smallest asserted type to the larger source type.
8638     // This eliminates the later assert:
8639     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
8640     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
8641     SDValue BigA = N0.getOperand(0);
8642     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
8643     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
8644            "Asserting zero/sign-extended bits to a type larger than the "
8645            "truncated destination does not provide information");
8646 
8647     SDLoc DL(N);
8648     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
8649     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
8650     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
8651                                     BigA.getOperand(0), MinAssertVTVal);
8652     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
8653   }
8654 
8655   return SDValue();
8656 }
8657 
8658 /// If the result of a wider load is shifted to right of N  bits and then
8659 /// truncated to a narrower type and where N is a multiple of number of bits of
8660 /// the narrower type, transform it to a narrower load from address + N / num of
8661 /// bits of new type. Also narrow the load if the result is masked with an AND
8662 /// to effectively produce a smaller type. If the result is to be extended, also
8663 /// fold the extension to form a extending load.
8664 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
8665   unsigned Opc = N->getOpcode();
8666 
8667   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
8668   SDValue N0 = N->getOperand(0);
8669   EVT VT = N->getValueType(0);
8670   EVT ExtVT = VT;
8671 
8672   // This transformation isn't valid for vector loads.
8673   if (VT.isVector())
8674     return SDValue();
8675 
8676   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
8677   // extended to VT.
8678   if (Opc == ISD::SIGN_EXTEND_INREG) {
8679     ExtType = ISD::SEXTLOAD;
8680     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8681   } else if (Opc == ISD::SRL) {
8682     // Another special-case: SRL is basically zero-extending a narrower value,
8683     // or it maybe shifting a higher subword, half or byte into the lowest
8684     // bits.
8685     ExtType = ISD::ZEXTLOAD;
8686     N0 = SDValue(N, 0);
8687 
8688     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
8689     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8690     if (!N01 || !LN0)
8691       return SDValue();
8692 
8693     uint64_t ShiftAmt = N01->getZExtValue();
8694     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
8695     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
8696       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
8697     else
8698       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
8699                                 VT.getSizeInBits() - ShiftAmt);
8700   } else if (Opc == ISD::AND) {
8701     // An AND with a constant mask is the same as a truncate + zero-extend.
8702     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
8703     if (!AndC || !AndC->getAPIntValue().isMask())
8704       return SDValue();
8705 
8706     unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
8707     ExtType = ISD::ZEXTLOAD;
8708     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
8709   }
8710 
8711   unsigned ShAmt = 0;
8712   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
8713     SDValue SRL = N0;
8714     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
8715       ShAmt = ConstShift->getZExtValue();
8716       unsigned EVTBits = ExtVT.getSizeInBits();
8717       // Is the shift amount a multiple of size of VT?
8718       if ((ShAmt & (EVTBits-1)) == 0) {
8719         N0 = N0.getOperand(0);
8720         // Is the load width a multiple of size of VT?
8721         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
8722           return SDValue();
8723       }
8724 
8725       // At this point, we must have a load or else we can't do the transform.
8726       if (!isa<LoadSDNode>(N0)) return SDValue();
8727 
8728       auto *LN0 = cast<LoadSDNode>(N0);
8729 
8730       // Because a SRL must be assumed to *need* to zero-extend the high bits
8731       // (as opposed to anyext the high bits), we can't combine the zextload
8732       // lowering of SRL and an sextload.
8733       if (LN0->getExtensionType() == ISD::SEXTLOAD)
8734         return SDValue();
8735 
8736       // If the shift amount is larger than the input type then we're not
8737       // accessing any of the loaded bytes.  If the load was a zextload/extload
8738       // then the result of the shift+trunc is zero/undef (handled elsewhere).
8739       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
8740         return SDValue();
8741 
8742       // If the SRL is only used by a masking AND, we may be able to adjust
8743       // the ExtVT to make the AND redundant.
8744       SDNode *Mask = *(SRL->use_begin());
8745       if (Mask->getOpcode() == ISD::AND &&
8746           isa<ConstantSDNode>(Mask->getOperand(1))) {
8747         const APInt &ShiftMask =
8748           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
8749         if (ShiftMask.isMask()) {
8750           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
8751                                            ShiftMask.countTrailingOnes());
8752           // If the mask is smaller, recompute the type.
8753           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
8754               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
8755             ExtVT = MaskedVT;
8756         }
8757       }
8758     }
8759   }
8760 
8761   // If the load is shifted left (and the result isn't shifted back right),
8762   // we can fold the truncate through the shift.
8763   unsigned ShLeftAmt = 0;
8764   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
8765       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
8766     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
8767       ShLeftAmt = N01->getZExtValue();
8768       N0 = N0.getOperand(0);
8769     }
8770   }
8771 
8772   // If we haven't found a load, we can't narrow it.
8773   if (!isa<LoadSDNode>(N0))
8774     return SDValue();
8775 
8776   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8777   if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
8778     return SDValue();
8779 
8780   // For big endian targets, we need to adjust the offset to the pointer to
8781   // load the correct bytes.
8782   if (DAG.getDataLayout().isBigEndian()) {
8783     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
8784     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
8785     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
8786   }
8787 
8788   EVT PtrType = N0.getOperand(1).getValueType();
8789   uint64_t PtrOff = ShAmt / 8;
8790   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
8791   SDLoc DL(LN0);
8792   // The original load itself didn't wrap, so an offset within it doesn't.
8793   SDNodeFlags Flags;
8794   Flags.setNoUnsignedWrap(true);
8795   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
8796                                PtrType, LN0->getBasePtr(),
8797                                DAG.getConstant(PtrOff, DL, PtrType),
8798                                Flags);
8799   AddToWorklist(NewPtr.getNode());
8800 
8801   SDValue Load;
8802   if (ExtType == ISD::NON_EXTLOAD)
8803     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
8804                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8805                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8806   else
8807     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
8808                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
8809                           NewAlign, LN0->getMemOperand()->getFlags(),
8810                           LN0->getAAInfo());
8811 
8812   // Replace the old load's chain with the new load's chain.
8813   WorklistRemover DeadNodes(*this);
8814   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
8815 
8816   // Shift the result left, if we've swallowed a left shift.
8817   SDValue Result = Load;
8818   if (ShLeftAmt != 0) {
8819     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
8820     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
8821       ShImmTy = VT;
8822     // If the shift amount is as large as the result size (but, presumably,
8823     // no larger than the source) then the useful bits of the result are
8824     // zero; we can't simply return the shortened shift, because the result
8825     // of that operation is undefined.
8826     SDLoc DL(N0);
8827     if (ShLeftAmt >= VT.getSizeInBits())
8828       Result = DAG.getConstant(0, DL, VT);
8829     else
8830       Result = DAG.getNode(ISD::SHL, DL, VT,
8831                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
8832   }
8833 
8834   // Return the new loaded value.
8835   return Result;
8836 }
8837 
8838 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
8839   SDValue N0 = N->getOperand(0);
8840   SDValue N1 = N->getOperand(1);
8841   EVT VT = N->getValueType(0);
8842   EVT EVT = cast<VTSDNode>(N1)->getVT();
8843   unsigned VTBits = VT.getScalarSizeInBits();
8844   unsigned EVTBits = EVT.getScalarSizeInBits();
8845 
8846   if (N0.isUndef())
8847     return DAG.getUNDEF(VT);
8848 
8849   // fold (sext_in_reg c1) -> c1
8850   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8851     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
8852 
8853   // If the input is already sign extended, just drop the extension.
8854   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
8855     return N0;
8856 
8857   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
8858   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
8859       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
8860     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8861                        N0.getOperand(0), N1);
8862 
8863   // fold (sext_in_reg (sext x)) -> (sext x)
8864   // fold (sext_in_reg (aext x)) -> (sext x)
8865   // if x is small enough.
8866   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
8867     SDValue N00 = N0.getOperand(0);
8868     if (N00.getScalarValueSizeInBits() <= EVTBits &&
8869         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8870       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8871   }
8872 
8873   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
8874   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
8875        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
8876        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
8877       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
8878     if (!LegalOperations ||
8879         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
8880       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
8881   }
8882 
8883   // fold (sext_in_reg (zext x)) -> (sext x)
8884   // iff we are extending the source sign bit.
8885   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
8886     SDValue N00 = N0.getOperand(0);
8887     if (N00.getScalarValueSizeInBits() == EVTBits &&
8888         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
8889       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
8890   }
8891 
8892   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
8893   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
8894     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
8895 
8896   // fold operands of sext_in_reg based on knowledge that the top bits are not
8897   // demanded.
8898   if (SimplifyDemandedBits(SDValue(N, 0)))
8899     return SDValue(N, 0);
8900 
8901   // fold (sext_in_reg (load x)) -> (smaller sextload x)
8902   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
8903   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8904     return NarrowLoad;
8905 
8906   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
8907   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
8908   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
8909   if (N0.getOpcode() == ISD::SRL) {
8910     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
8911       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
8912         // We can turn this into an SRA iff the input to the SRL is already sign
8913         // extended enough.
8914         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
8915         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
8916           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
8917                              N0.getOperand(0), N0.getOperand(1));
8918       }
8919   }
8920 
8921   // fold (sext_inreg (extload x)) -> (sextload x)
8922   // If sextload is not supported by target, we can only do the combine when
8923   // load has one use. Doing otherwise can block folding the extload with other
8924   // extends that the target does support.
8925   if (ISD::isEXTLoad(N0.getNode()) &&
8926       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8927       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8928       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
8929         N0.hasOneUse()) ||
8930        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8931     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8932     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8933                                      LN0->getChain(),
8934                                      LN0->getBasePtr(), EVT,
8935                                      LN0->getMemOperand());
8936     CombineTo(N, ExtLoad);
8937     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8938     AddToWorklist(ExtLoad.getNode());
8939     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8940   }
8941   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
8942   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
8943       N0.hasOneUse() &&
8944       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
8945       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
8946        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
8947     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8948     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
8949                                      LN0->getChain(),
8950                                      LN0->getBasePtr(), EVT,
8951                                      LN0->getMemOperand());
8952     CombineTo(N, ExtLoad);
8953     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8954     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8955   }
8956 
8957   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
8958   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
8959     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8960                                            N0.getOperand(1), false))
8961       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8962                          BSwap, N1);
8963   }
8964 
8965   return SDValue();
8966 }
8967 
8968 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
8969   SDValue N0 = N->getOperand(0);
8970   EVT VT = N->getValueType(0);
8971 
8972   if (N0.isUndef())
8973     return DAG.getUNDEF(VT);
8974 
8975   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8976                                               LegalOperations))
8977     return SDValue(Res, 0);
8978 
8979   return SDValue();
8980 }
8981 
8982 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
8983   SDValue N0 = N->getOperand(0);
8984   EVT VT = N->getValueType(0);
8985 
8986   if (N0.isUndef())
8987     return DAG.getUNDEF(VT);
8988 
8989   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8990                                               LegalOperations))
8991     return SDValue(Res, 0);
8992 
8993   return SDValue();
8994 }
8995 
8996 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
8997   SDValue N0 = N->getOperand(0);
8998   EVT VT = N->getValueType(0);
8999   bool isLE = DAG.getDataLayout().isLittleEndian();
9000 
9001   // noop truncate
9002   if (N0.getValueType() == N->getValueType(0))
9003     return N0;
9004 
9005   // fold (truncate (truncate x)) -> (truncate x)
9006   if (N0.getOpcode() == ISD::TRUNCATE)
9007     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9008 
9009   // fold (truncate c1) -> c1
9010   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9011     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9012     if (C.getNode() != N)
9013       return C;
9014   }
9015 
9016   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9017   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9018       N0.getOpcode() == ISD::SIGN_EXTEND ||
9019       N0.getOpcode() == ISD::ANY_EXTEND) {
9020     // if the source is smaller than the dest, we still need an extend.
9021     if (N0.getOperand(0).getValueType().bitsLT(VT))
9022       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9023     // if the source is larger than the dest, than we just need the truncate.
9024     if (N0.getOperand(0).getValueType().bitsGT(VT))
9025       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9026     // if the source and dest are the same type, we can drop both the extend
9027     // and the truncate.
9028     return N0.getOperand(0);
9029   }
9030 
9031   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9032   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9033     return SDValue();
9034 
9035   // Fold extract-and-trunc into a narrow extract. For example:
9036   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9037   //   i32 y = TRUNCATE(i64 x)
9038   //        -- becomes --
9039   //   v16i8 b = BITCAST (v2i64 val)
9040   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9041   //
9042   // Note: We only run this optimization after type legalization (which often
9043   // creates this pattern) and before operation legalization after which
9044   // we need to be more careful about the vector instructions that we generate.
9045   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9046       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9047     EVT VecTy = N0.getOperand(0).getValueType();
9048     EVT ExTy = N0.getValueType();
9049     EVT TrTy = N->getValueType(0);
9050 
9051     unsigned NumElem = VecTy.getVectorNumElements();
9052     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9053 
9054     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9055     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9056 
9057     SDValue EltNo = N0->getOperand(1);
9058     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9059       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9060       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9061       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9062 
9063       SDLoc DL(N);
9064       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9065                          DAG.getBitcast(NVT, N0.getOperand(0)),
9066                          DAG.getConstant(Index, DL, IndexTy));
9067     }
9068   }
9069 
9070   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9071   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9072     EVT SrcVT = N0.getValueType();
9073     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9074         TLI.isTruncateFree(SrcVT, VT)) {
9075       SDLoc SL(N0);
9076       SDValue Cond = N0.getOperand(0);
9077       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9078       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9079       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9080     }
9081   }
9082 
9083   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9084   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9085       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9086       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9087     SDValue Amt = N0.getOperand(1);
9088     KnownBits Known;
9089     DAG.computeKnownBits(Amt, Known);
9090     unsigned Size = VT.getScalarSizeInBits();
9091     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9092       SDLoc SL(N);
9093       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9094 
9095       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9096       if (AmtVT != Amt.getValueType()) {
9097         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9098         AddToWorklist(Amt.getNode());
9099       }
9100       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9101     }
9102   }
9103 
9104   // Fold a series of buildvector, bitcast, and truncate if possible.
9105   // For example fold
9106   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9107   //   (2xi32 (buildvector x, y)).
9108   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9109       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9110       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9111       N0.getOperand(0).hasOneUse()) {
9112     SDValue BuildVect = N0.getOperand(0);
9113     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9114     EVT TruncVecEltTy = VT.getVectorElementType();
9115 
9116     // Check that the element types match.
9117     if (BuildVectEltTy == TruncVecEltTy) {
9118       // Now we only need to compute the offset of the truncated elements.
9119       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9120       unsigned TruncVecNumElts = VT.getVectorNumElements();
9121       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9122 
9123       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9124              "Invalid number of elements");
9125 
9126       SmallVector<SDValue, 8> Opnds;
9127       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9128         Opnds.push_back(BuildVect.getOperand(i));
9129 
9130       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9131     }
9132   }
9133 
9134   // See if we can simplify the input to this truncate through knowledge that
9135   // only the low bits are being used.
9136   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9137   // Currently we only perform this optimization on scalars because vectors
9138   // may have different active low bits.
9139   if (!VT.isVector()) {
9140     APInt Mask =
9141         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9142     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9143       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9144   }
9145 
9146   // fold (truncate (load x)) -> (smaller load x)
9147   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9148   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9149     if (SDValue Reduced = ReduceLoadWidth(N))
9150       return Reduced;
9151 
9152     // Handle the case where the load remains an extending load even
9153     // after truncation.
9154     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9155       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9156       if (!LN0->isVolatile() &&
9157           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9158         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9159                                          VT, LN0->getChain(), LN0->getBasePtr(),
9160                                          LN0->getMemoryVT(),
9161                                          LN0->getMemOperand());
9162         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9163         return NewLoad;
9164       }
9165     }
9166   }
9167 
9168   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9169   // where ... are all 'undef'.
9170   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9171     SmallVector<EVT, 8> VTs;
9172     SDValue V;
9173     unsigned Idx = 0;
9174     unsigned NumDefs = 0;
9175 
9176     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9177       SDValue X = N0.getOperand(i);
9178       if (!X.isUndef()) {
9179         V = X;
9180         Idx = i;
9181         NumDefs++;
9182       }
9183       // Stop if more than one members are non-undef.
9184       if (NumDefs > 1)
9185         break;
9186       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9187                                      VT.getVectorElementType(),
9188                                      X.getValueType().getVectorNumElements()));
9189     }
9190 
9191     if (NumDefs == 0)
9192       return DAG.getUNDEF(VT);
9193 
9194     if (NumDefs == 1) {
9195       assert(V.getNode() && "The single defined operand is empty!");
9196       SmallVector<SDValue, 8> Opnds;
9197       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9198         if (i != Idx) {
9199           Opnds.push_back(DAG.getUNDEF(VTs[i]));
9200           continue;
9201         }
9202         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9203         AddToWorklist(NV.getNode());
9204         Opnds.push_back(NV);
9205       }
9206       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9207     }
9208   }
9209 
9210   // Fold truncate of a bitcast of a vector to an extract of the low vector
9211   // element.
9212   //
9213   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9214   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9215     SDValue VecSrc = N0.getOperand(0);
9216     EVT SrcVT = VecSrc.getValueType();
9217     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9218         (!LegalOperations ||
9219          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9220       SDLoc SL(N);
9221 
9222       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9223       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9224       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9225                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9226     }
9227   }
9228 
9229   // Simplify the operands using demanded-bits information.
9230   if (!VT.isVector() &&
9231       SimplifyDemandedBits(SDValue(N, 0)))
9232     return SDValue(N, 0);
9233 
9234   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9235   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9236   // When the adde's carry is not used.
9237   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9238       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9239       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9240     SDLoc SL(N);
9241     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9242     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9243     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9244     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9245   }
9246 
9247   // fold (truncate (extract_subvector(ext x))) ->
9248   //      (extract_subvector x)
9249   // TODO: This can be generalized to cover cases where the truncate and extract
9250   // do not fully cancel each other out.
9251   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9252     SDValue N00 = N0.getOperand(0);
9253     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9254         N00.getOpcode() == ISD::ZERO_EXTEND ||
9255         N00.getOpcode() == ISD::ANY_EXTEND) {
9256       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9257           VT.getVectorElementType())
9258         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9259                            N00.getOperand(0), N0.getOperand(1));
9260     }
9261   }
9262 
9263   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9264     return NewVSel;
9265 
9266   return SDValue();
9267 }
9268 
9269 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9270   SDValue Elt = N->getOperand(i);
9271   if (Elt.getOpcode() != ISD::MERGE_VALUES)
9272     return Elt.getNode();
9273   return Elt.getOperand(Elt.getResNo()).getNode();
9274 }
9275 
9276 /// build_pair (load, load) -> load
9277 /// if load locations are consecutive.
9278 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9279   assert(N->getOpcode() == ISD::BUILD_PAIR);
9280 
9281   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9282   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9283 
9284   // A BUILD_PAIR is always having the least significant part in elt 0 and the
9285   // most significant part in elt 1. So when combining into one large load, we
9286   // need to consider the endianness.
9287   if (DAG.getDataLayout().isBigEndian())
9288     std::swap(LD1, LD2);
9289 
9290   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9291       LD1->getAddressSpace() != LD2->getAddressSpace())
9292     return SDValue();
9293   EVT LD1VT = LD1->getValueType(0);
9294   unsigned LD1Bytes = LD1VT.getStoreSize();
9295   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9296       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9297     unsigned Align = LD1->getAlignment();
9298     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9299         VT.getTypeForEVT(*DAG.getContext()));
9300 
9301     if (NewAlign <= Align &&
9302         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9303       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9304                          LD1->getPointerInfo(), Align);
9305   }
9306 
9307   return SDValue();
9308 }
9309 
9310 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9311   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9312   // and Lo parts; on big-endian machines it doesn't.
9313   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9314 }
9315 
9316 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9317                                     const TargetLowering &TLI) {
9318   // If this is not a bitcast to an FP type or if the target doesn't have
9319   // IEEE754-compliant FP logic, we're done.
9320   EVT VT = N->getValueType(0);
9321   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9322     return SDValue();
9323 
9324   // TODO: Use splat values for the constant-checking below and remove this
9325   // restriction.
9326   SDValue N0 = N->getOperand(0);
9327   EVT SourceVT = N0.getValueType();
9328   if (SourceVT.isVector())
9329     return SDValue();
9330 
9331   unsigned FPOpcode;
9332   APInt SignMask;
9333   switch (N0.getOpcode()) {
9334   case ISD::AND:
9335     FPOpcode = ISD::FABS;
9336     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
9337     break;
9338   case ISD::XOR:
9339     FPOpcode = ISD::FNEG;
9340     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
9341     break;
9342   // TODO: ISD::OR --> ISD::FNABS?
9343   default:
9344     return SDValue();
9345   }
9346 
9347   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9348   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9349   SDValue LogicOp0 = N0.getOperand(0);
9350   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9351   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9352       LogicOp0.getOpcode() == ISD::BITCAST &&
9353       LogicOp0->getOperand(0).getValueType() == VT)
9354     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
9355 
9356   return SDValue();
9357 }
9358 
9359 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9360   SDValue N0 = N->getOperand(0);
9361   EVT VT = N->getValueType(0);
9362 
9363   if (N0.isUndef())
9364     return DAG.getUNDEF(VT);
9365 
9366   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9367   // Only do this before legalize, since afterward the target may be depending
9368   // on the bitconvert.
9369   // First check to see if this is all constant.
9370   if (!LegalTypes &&
9371       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9372       VT.isVector()) {
9373     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
9374 
9375     EVT DestEltVT = N->getValueType(0).getVectorElementType();
9376     assert(!DestEltVT.isVector() &&
9377            "Element type of vector ValueType must not be vector!");
9378     if (isSimple)
9379       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
9380   }
9381 
9382   // If the input is a constant, let getNode fold it.
9383   // We always need to check that this is just a fp -> int or int -> conversion
9384   // otherwise we will get back N which will confuse the caller into thinking
9385   // we used CombineTo. This can block target combines from running. If we can't
9386   // allowed legal operations, we need to ensure the resulting operation will be
9387   // legal.
9388   // TODO: Maybe we should check that the return value isn't N explicitly?
9389   if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9390        (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
9391       (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9392        (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
9393     return DAG.getBitcast(VT, N0);
9394 
9395   // (conv (conv x, t1), t2) -> (conv x, t2)
9396   if (N0.getOpcode() == ISD::BITCAST)
9397     return DAG.getBitcast(VT, N0.getOperand(0));
9398 
9399   // fold (conv (load x)) -> (load (conv*)x)
9400   // If the resultant load doesn't need a higher alignment than the original!
9401   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9402       // Do not change the width of a volatile load.
9403       !cast<LoadSDNode>(N0)->isVolatile() &&
9404       // Do not remove the cast if the types differ in endian layout.
9405       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9406           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9407       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
9408       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9409     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9410     unsigned OrigAlign = LN0->getAlignment();
9411 
9412     bool Fast = false;
9413     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9414                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
9415         Fast) {
9416       SDValue Load =
9417           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9418                       LN0->getPointerInfo(), OrigAlign,
9419                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9420       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9421       return Load;
9422     }
9423   }
9424 
9425   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9426     return V;
9427 
9428   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9429   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9430   //
9431   // For ppc_fp128:
9432   // fold (bitcast (fneg x)) ->
9433   //     flipbit = signbit
9434   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9435   //
9436   // fold (bitcast (fabs x)) ->
9437   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
9438   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9439   // This often reduces constant pool loads.
9440   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9441        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9442       N0.getNode()->hasOneUse() && VT.isInteger() &&
9443       !VT.isVector() && !N0.getValueType().isVector()) {
9444     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9445     AddToWorklist(NewConv.getNode());
9446 
9447     SDLoc DL(N);
9448     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9449       assert(VT.getSizeInBits() == 128);
9450       SDValue SignBit = DAG.getConstant(
9451           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9452       SDValue FlipBit;
9453       if (N0.getOpcode() == ISD::FNEG) {
9454         FlipBit = SignBit;
9455         AddToWorklist(FlipBit.getNode());
9456       } else {
9457         assert(N0.getOpcode() == ISD::FABS);
9458         SDValue Hi =
9459             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9460                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9461                                               SDLoc(NewConv)));
9462         AddToWorklist(Hi.getNode());
9463         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9464         AddToWorklist(FlipBit.getNode());
9465       }
9466       SDValue FlipBits =
9467           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9468       AddToWorklist(FlipBits.getNode());
9469       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9470     }
9471     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9472     if (N0.getOpcode() == ISD::FNEG)
9473       return DAG.getNode(ISD::XOR, DL, VT,
9474                          NewConv, DAG.getConstant(SignBit, DL, VT));
9475     assert(N0.getOpcode() == ISD::FABS);
9476     return DAG.getNode(ISD::AND, DL, VT,
9477                        NewConv, DAG.getConstant(~SignBit, DL, VT));
9478   }
9479 
9480   // fold (bitconvert (fcopysign cst, x)) ->
9481   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
9482   // Note that we don't handle (copysign x, cst) because this can always be
9483   // folded to an fneg or fabs.
9484   //
9485   // For ppc_fp128:
9486   // fold (bitcast (fcopysign cst, x)) ->
9487   //     flipbit = (and (extract_element
9488   //                     (xor (bitcast cst), (bitcast x)), 0),
9489   //                    signbit)
9490   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
9491   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9492       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9493       VT.isInteger() && !VT.isVector()) {
9494     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9495     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9496     if (isTypeLegal(IntXVT)) {
9497       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9498       AddToWorklist(X.getNode());
9499 
9500       // If X has a different width than the result/lhs, sext it or truncate it.
9501       unsigned VTWidth = VT.getSizeInBits();
9502       if (OrigXWidth < VTWidth) {
9503         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9504         AddToWorklist(X.getNode());
9505       } else if (OrigXWidth > VTWidth) {
9506         // To get the sign bit in the right place, we have to shift it right
9507         // before truncating.
9508         SDLoc DL(X);
9509         X = DAG.getNode(ISD::SRL, DL,
9510                         X.getValueType(), X,
9511                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9512                                         X.getValueType()));
9513         AddToWorklist(X.getNode());
9514         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9515         AddToWorklist(X.getNode());
9516       }
9517 
9518       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9519         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9520         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9521         AddToWorklist(Cst.getNode());
9522         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9523         AddToWorklist(X.getNode());
9524         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9525         AddToWorklist(XorResult.getNode());
9526         SDValue XorResult64 = DAG.getNode(
9527             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9528             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9529                                   SDLoc(XorResult)));
9530         AddToWorklist(XorResult64.getNode());
9531         SDValue FlipBit =
9532             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9533                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9534         AddToWorklist(FlipBit.getNode());
9535         SDValue FlipBits =
9536             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9537         AddToWorklist(FlipBits.getNode());
9538         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9539       }
9540       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9541       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9542                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
9543       AddToWorklist(X.getNode());
9544 
9545       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9546       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9547                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9548       AddToWorklist(Cst.getNode());
9549 
9550       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9551     }
9552   }
9553 
9554   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9555   if (N0.getOpcode() == ISD::BUILD_PAIR)
9556     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9557       return CombineLD;
9558 
9559   // Remove double bitcasts from shuffles - this is often a legacy of
9560   // XformToShuffleWithZero being used to combine bitmaskings (of
9561   // float vectors bitcast to integer vectors) into shuffles.
9562   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9563   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9564       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9565       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9566       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9567     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9568 
9569     // If operands are a bitcast, peek through if it casts the original VT.
9570     // If operands are a constant, just bitcast back to original VT.
9571     auto PeekThroughBitcast = [&](SDValue Op) {
9572       if (Op.getOpcode() == ISD::BITCAST &&
9573           Op.getOperand(0).getValueType() == VT)
9574         return SDValue(Op.getOperand(0));
9575       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9576           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9577         return DAG.getBitcast(VT, Op);
9578       return SDValue();
9579     };
9580 
9581     // FIXME: If either input vector is bitcast, try to convert the shuffle to
9582     // the result type of this bitcast. This would eliminate at least one
9583     // bitcast. See the transform in InstCombine.
9584     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9585     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9586     if (!(SV0 && SV1))
9587       return SDValue();
9588 
9589     int MaskScale =
9590         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9591     SmallVector<int, 8> NewMask;
9592     for (int M : SVN->getMask())
9593       for (int i = 0; i != MaskScale; ++i)
9594         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9595 
9596     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9597     if (!LegalMask) {
9598       std::swap(SV0, SV1);
9599       ShuffleVectorSDNode::commuteMask(NewMask);
9600       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9601     }
9602 
9603     if (LegalMask)
9604       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9605   }
9606 
9607   return SDValue();
9608 }
9609 
9610 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
9611   EVT VT = N->getValueType(0);
9612   return CombineConsecutiveLoads(N, VT);
9613 }
9614 
9615 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
9616 /// operands. DstEltVT indicates the destination element value type.
9617 SDValue DAGCombiner::
9618 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
9619   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
9620 
9621   // If this is already the right type, we're done.
9622   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
9623 
9624   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
9625   unsigned DstBitSize = DstEltVT.getSizeInBits();
9626 
9627   // If this is a conversion of N elements of one type to N elements of another
9628   // type, convert each element.  This handles FP<->INT cases.
9629   if (SrcBitSize == DstBitSize) {
9630     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9631                               BV->getValueType(0).getVectorNumElements());
9632 
9633     // Due to the FP element handling below calling this routine recursively,
9634     // we can end up with a scalar-to-vector node here.
9635     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
9636       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
9637                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
9638 
9639     SmallVector<SDValue, 8> Ops;
9640     for (SDValue Op : BV->op_values()) {
9641       // If the vector element type is not legal, the BUILD_VECTOR operands
9642       // are promoted and implicitly truncated.  Make that explicit here.
9643       if (Op.getValueType() != SrcEltVT)
9644         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
9645       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
9646       AddToWorklist(Ops.back().getNode());
9647     }
9648     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
9649   }
9650 
9651   // Otherwise, we're growing or shrinking the elements.  To avoid having to
9652   // handle annoying details of growing/shrinking FP values, we convert them to
9653   // int first.
9654   if (SrcEltVT.isFloatingPoint()) {
9655     // Convert the input float vector to a int vector where the elements are the
9656     // same sizes.
9657     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
9658     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
9659     SrcEltVT = IntVT;
9660   }
9661 
9662   // Now we know the input is an integer vector.  If the output is a FP type,
9663   // convert to integer first, then to FP of the right size.
9664   if (DstEltVT.isFloatingPoint()) {
9665     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
9666     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
9667 
9668     // Next, convert to FP elements of the same size.
9669     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
9670   }
9671 
9672   SDLoc DL(BV);
9673 
9674   // Okay, we know the src/dst types are both integers of differing types.
9675   // Handling growing first.
9676   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
9677   if (SrcBitSize < DstBitSize) {
9678     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
9679 
9680     SmallVector<SDValue, 8> Ops;
9681     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
9682          i += NumInputsPerOutput) {
9683       bool isLE = DAG.getDataLayout().isLittleEndian();
9684       APInt NewBits = APInt(DstBitSize, 0);
9685       bool EltIsUndef = true;
9686       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
9687         // Shift the previously computed bits over.
9688         NewBits <<= SrcBitSize;
9689         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
9690         if (Op.isUndef()) continue;
9691         EltIsUndef = false;
9692 
9693         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
9694                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
9695       }
9696 
9697       if (EltIsUndef)
9698         Ops.push_back(DAG.getUNDEF(DstEltVT));
9699       else
9700         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
9701     }
9702 
9703     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
9704     return DAG.getBuildVector(VT, DL, Ops);
9705   }
9706 
9707   // Finally, this must be the case where we are shrinking elements: each input
9708   // turns into multiple outputs.
9709   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
9710   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
9711                             NumOutputsPerInput*BV->getNumOperands());
9712   SmallVector<SDValue, 8> Ops;
9713 
9714   for (const SDValue &Op : BV->op_values()) {
9715     if (Op.isUndef()) {
9716       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
9717       continue;
9718     }
9719 
9720     APInt OpVal = cast<ConstantSDNode>(Op)->
9721                   getAPIntValue().zextOrTrunc(SrcBitSize);
9722 
9723     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
9724       APInt ThisVal = OpVal.trunc(DstBitSize);
9725       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
9726       OpVal.lshrInPlace(DstBitSize);
9727     }
9728 
9729     // For big endian targets, swap the order of the pieces of each element.
9730     if (DAG.getDataLayout().isBigEndian())
9731       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
9732   }
9733 
9734   return DAG.getBuildVector(VT, DL, Ops);
9735 }
9736 
9737 static bool isContractable(SDNode *N) {
9738   SDNodeFlags F = N->getFlags();
9739   return F.hasAllowContract() || F.hasAllowReassociation();
9740 }
9741 
9742 /// Try to perform FMA combining on a given FADD node.
9743 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
9744   SDValue N0 = N->getOperand(0);
9745   SDValue N1 = N->getOperand(1);
9746   EVT VT = N->getValueType(0);
9747   SDLoc SL(N);
9748 
9749   const TargetOptions &Options = DAG.getTarget().Options;
9750 
9751   // Floating-point multiply-add with intermediate rounding.
9752   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9753 
9754   // Floating-point multiply-add without intermediate rounding.
9755   bool HasFMA =
9756       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9757       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9758 
9759   // No valid opcode, do not combine.
9760   if (!HasFMAD && !HasFMA)
9761     return SDValue();
9762 
9763   SDNodeFlags Flags = N->getFlags();
9764   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
9765   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9766                               CanFuse || HasFMAD);
9767   // If the addition is not contractable, do not combine.
9768   if (!AllowFusionGlobally && !isContractable(N))
9769     return SDValue();
9770 
9771   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9772   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9773     return SDValue();
9774 
9775   // Always prefer FMAD to FMA for precision.
9776   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9777   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9778 
9779   // Is the node an FMUL and contractable either due to global flags or
9780   // SDNodeFlags.
9781   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9782     if (N.getOpcode() != ISD::FMUL)
9783       return false;
9784     return AllowFusionGlobally || isContractable(N.getNode());
9785   };
9786   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
9787   // prefer to fold the multiply with fewer uses.
9788   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
9789     if (N0.getNode()->use_size() > N1.getNode()->use_size())
9790       std::swap(N0, N1);
9791   }
9792 
9793   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
9794   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
9795     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9796                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
9797   }
9798 
9799   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
9800   // Note: Commutes FADD operands.
9801   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
9802     return DAG.getNode(PreferredFusedOpcode, SL, VT,
9803                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
9804   }
9805 
9806   // Look through FP_EXTEND nodes to do more combining.
9807 
9808   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
9809   if (N0.getOpcode() == ISD::FP_EXTEND) {
9810     SDValue N00 = N0.getOperand(0);
9811     if (isContractableFMUL(N00) &&
9812         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9813       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9814                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9815                                      N00.getOperand(0)),
9816                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9817                                      N00.getOperand(1)), N1, Flags);
9818     }
9819   }
9820 
9821   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
9822   // Note: Commutes FADD operands.
9823   if (N1.getOpcode() == ISD::FP_EXTEND) {
9824     SDValue N10 = N1.getOperand(0);
9825     if (isContractableFMUL(N10) &&
9826         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9827       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9828                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9829                                      N10.getOperand(0)),
9830                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
9831                                      N10.getOperand(1)), N0, Flags);
9832     }
9833   }
9834 
9835   // More folding opportunities when target permits.
9836   if (Aggressive) {
9837     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
9838     if (CanFuse &&
9839         N0.getOpcode() == PreferredFusedOpcode &&
9840         N0.getOperand(2).getOpcode() == ISD::FMUL &&
9841         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
9842       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9843                          N0.getOperand(0), N0.getOperand(1),
9844                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9845                                      N0.getOperand(2).getOperand(0),
9846                                      N0.getOperand(2).getOperand(1),
9847                                      N1, Flags), Flags);
9848     }
9849 
9850     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
9851     if (CanFuse &&
9852         N1->getOpcode() == PreferredFusedOpcode &&
9853         N1.getOperand(2).getOpcode() == ISD::FMUL &&
9854         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
9855       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9856                          N1.getOperand(0), N1.getOperand(1),
9857                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9858                                      N1.getOperand(2).getOperand(0),
9859                                      N1.getOperand(2).getOperand(1),
9860                                      N0, Flags), Flags);
9861     }
9862 
9863 
9864     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
9865     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
9866     auto FoldFAddFMAFPExtFMul = [&] (
9867       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
9868       SDNodeFlags Flags) {
9869       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
9870                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9871                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9872                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9873                                      Z, Flags), Flags);
9874     };
9875     if (N0.getOpcode() == PreferredFusedOpcode) {
9876       SDValue N02 = N0.getOperand(2);
9877       if (N02.getOpcode() == ISD::FP_EXTEND) {
9878         SDValue N020 = N02.getOperand(0);
9879         if (isContractableFMUL(N020) &&
9880             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
9881           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
9882                                       N020.getOperand(0), N020.getOperand(1),
9883                                       N1, Flags);
9884         }
9885       }
9886     }
9887 
9888     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
9889     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
9890     // FIXME: This turns two single-precision and one double-precision
9891     // operation into two double-precision operations, which might not be
9892     // interesting for all targets, especially GPUs.
9893     auto FoldFAddFPExtFMAFMul = [&] (
9894       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
9895       SDNodeFlags Flags) {
9896       return DAG.getNode(PreferredFusedOpcode, SL, VT,
9897                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
9898                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
9899                          DAG.getNode(PreferredFusedOpcode, SL, VT,
9900                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
9901                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
9902                                      Z, Flags), Flags);
9903     };
9904     if (N0.getOpcode() == ISD::FP_EXTEND) {
9905       SDValue N00 = N0.getOperand(0);
9906       if (N00.getOpcode() == PreferredFusedOpcode) {
9907         SDValue N002 = N00.getOperand(2);
9908         if (isContractableFMUL(N002) &&
9909             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
9910           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
9911                                       N002.getOperand(0), N002.getOperand(1),
9912                                       N1, Flags);
9913         }
9914       }
9915     }
9916 
9917     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
9918     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
9919     if (N1.getOpcode() == PreferredFusedOpcode) {
9920       SDValue N12 = N1.getOperand(2);
9921       if (N12.getOpcode() == ISD::FP_EXTEND) {
9922         SDValue N120 = N12.getOperand(0);
9923         if (isContractableFMUL(N120) &&
9924             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
9925           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
9926                                       N120.getOperand(0), N120.getOperand(1),
9927                                       N0, Flags);
9928         }
9929       }
9930     }
9931 
9932     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
9933     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
9934     // FIXME: This turns two single-precision and one double-precision
9935     // operation into two double-precision operations, which might not be
9936     // interesting for all targets, especially GPUs.
9937     if (N1.getOpcode() == ISD::FP_EXTEND) {
9938       SDValue N10 = N1.getOperand(0);
9939       if (N10.getOpcode() == PreferredFusedOpcode) {
9940         SDValue N102 = N10.getOperand(2);
9941         if (isContractableFMUL(N102) &&
9942             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
9943           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
9944                                       N102.getOperand(0), N102.getOperand(1),
9945                                       N0, Flags);
9946         }
9947       }
9948     }
9949   }
9950 
9951   return SDValue();
9952 }
9953 
9954 /// Try to perform FMA combining on a given FSUB node.
9955 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
9956   SDValue N0 = N->getOperand(0);
9957   SDValue N1 = N->getOperand(1);
9958   EVT VT = N->getValueType(0);
9959   SDLoc SL(N);
9960 
9961   const TargetOptions &Options = DAG.getTarget().Options;
9962   // Floating-point multiply-add with intermediate rounding.
9963   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
9964 
9965   // Floating-point multiply-add without intermediate rounding.
9966   bool HasFMA =
9967       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
9968       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
9969 
9970   // No valid opcode, do not combine.
9971   if (!HasFMAD && !HasFMA)
9972     return SDValue();
9973 
9974   const SDNodeFlags Flags = N->getFlags();
9975   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
9976   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
9977                               CanFuse || HasFMAD);
9978 
9979   // If the subtraction is not contractable, do not combine.
9980   if (!AllowFusionGlobally && !isContractable(N))
9981     return SDValue();
9982 
9983   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
9984   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
9985     return SDValue();
9986 
9987   // Always prefer FMAD to FMA for precision.
9988   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
9989   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
9990 
9991   // Is the node an FMUL and contractable either due to global flags or
9992   // SDNodeFlags.
9993   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
9994     if (N.getOpcode() != ISD::FMUL)
9995       return false;
9996     return AllowFusionGlobally || isContractable(N.getNode());
9997   };
9998 
9999   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10000   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10001     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10002                        N0.getOperand(0), N0.getOperand(1),
10003                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10004   }
10005 
10006   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10007   // Note: Commutes FSUB operands.
10008   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10009     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10010                        DAG.getNode(ISD::FNEG, SL, VT,
10011                                    N1.getOperand(0)),
10012                        N1.getOperand(1), N0, Flags);
10013   }
10014 
10015   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10016   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10017       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10018     SDValue N00 = N0.getOperand(0).getOperand(0);
10019     SDValue N01 = N0.getOperand(0).getOperand(1);
10020     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10021                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10022                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10023   }
10024 
10025   // Look through FP_EXTEND nodes to do more combining.
10026 
10027   // fold (fsub (fpext (fmul x, y)), z)
10028   //   -> (fma (fpext x), (fpext y), (fneg z))
10029   if (N0.getOpcode() == ISD::FP_EXTEND) {
10030     SDValue N00 = N0.getOperand(0);
10031     if (isContractableFMUL(N00) &&
10032         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10033       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10034                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10035                                      N00.getOperand(0)),
10036                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10037                                      N00.getOperand(1)),
10038                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10039     }
10040   }
10041 
10042   // fold (fsub x, (fpext (fmul y, z)))
10043   //   -> (fma (fneg (fpext y)), (fpext z), x)
10044   // Note: Commutes FSUB operands.
10045   if (N1.getOpcode() == ISD::FP_EXTEND) {
10046     SDValue N10 = N1.getOperand(0);
10047     if (isContractableFMUL(N10) &&
10048         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10049       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10050                          DAG.getNode(ISD::FNEG, SL, VT,
10051                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10052                                                  N10.getOperand(0))),
10053                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10054                                      N10.getOperand(1)),
10055                          N0, Flags);
10056     }
10057   }
10058 
10059   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10060   //   -> (fneg (fma (fpext x), (fpext y), z))
10061   // Note: This could be removed with appropriate canonicalization of the
10062   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10063   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10064   // from implementing the canonicalization in visitFSUB.
10065   if (N0.getOpcode() == ISD::FP_EXTEND) {
10066     SDValue N00 = N0.getOperand(0);
10067     if (N00.getOpcode() == ISD::FNEG) {
10068       SDValue N000 = N00.getOperand(0);
10069       if (isContractableFMUL(N000) &&
10070           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10071         return DAG.getNode(ISD::FNEG, SL, VT,
10072                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10073                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10074                                                    N000.getOperand(0)),
10075                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10076                                                    N000.getOperand(1)),
10077                                        N1, Flags));
10078       }
10079     }
10080   }
10081 
10082   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10083   //   -> (fneg (fma (fpext x)), (fpext y), z)
10084   // Note: This could be removed with appropriate canonicalization of the
10085   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10086   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10087   // from implementing the canonicalization in visitFSUB.
10088   if (N0.getOpcode() == ISD::FNEG) {
10089     SDValue N00 = N0.getOperand(0);
10090     if (N00.getOpcode() == ISD::FP_EXTEND) {
10091       SDValue N000 = N00.getOperand(0);
10092       if (isContractableFMUL(N000) &&
10093           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10094         return DAG.getNode(ISD::FNEG, SL, VT,
10095                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10096                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10097                                                    N000.getOperand(0)),
10098                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10099                                                    N000.getOperand(1)),
10100                                        N1, Flags));
10101       }
10102     }
10103   }
10104 
10105   // More folding opportunities when target permits.
10106   if (Aggressive) {
10107     // fold (fsub (fma x, y, (fmul u, v)), z)
10108     //   -> (fma x, y (fma u, v, (fneg z)))
10109     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10110         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10111         N0.getOperand(2)->hasOneUse()) {
10112       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10113                          N0.getOperand(0), N0.getOperand(1),
10114                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10115                                      N0.getOperand(2).getOperand(0),
10116                                      N0.getOperand(2).getOperand(1),
10117                                      DAG.getNode(ISD::FNEG, SL, VT,
10118                                                  N1), Flags), Flags);
10119     }
10120 
10121     // fold (fsub x, (fma y, z, (fmul u, v)))
10122     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10123     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10124         isContractableFMUL(N1.getOperand(2))) {
10125       SDValue N20 = N1.getOperand(2).getOperand(0);
10126       SDValue N21 = N1.getOperand(2).getOperand(1);
10127       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10128                          DAG.getNode(ISD::FNEG, SL, VT,
10129                                      N1.getOperand(0)),
10130                          N1.getOperand(1),
10131                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10132                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10133                                      N21, N0, Flags), Flags);
10134     }
10135 
10136 
10137     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10138     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10139     if (N0.getOpcode() == PreferredFusedOpcode) {
10140       SDValue N02 = N0.getOperand(2);
10141       if (N02.getOpcode() == ISD::FP_EXTEND) {
10142         SDValue N020 = N02.getOperand(0);
10143         if (isContractableFMUL(N020) &&
10144             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10145           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10146                              N0.getOperand(0), N0.getOperand(1),
10147                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10148                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10149                                                      N020.getOperand(0)),
10150                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10151                                                      N020.getOperand(1)),
10152                                          DAG.getNode(ISD::FNEG, SL, VT,
10153                                                      N1), Flags), Flags);
10154         }
10155       }
10156     }
10157 
10158     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10159     //   -> (fma (fpext x), (fpext y),
10160     //           (fma (fpext u), (fpext v), (fneg z)))
10161     // FIXME: This turns two single-precision and one double-precision
10162     // operation into two double-precision operations, which might not be
10163     // interesting for all targets, especially GPUs.
10164     if (N0.getOpcode() == ISD::FP_EXTEND) {
10165       SDValue N00 = N0.getOperand(0);
10166       if (N00.getOpcode() == PreferredFusedOpcode) {
10167         SDValue N002 = N00.getOperand(2);
10168         if (isContractableFMUL(N002) &&
10169             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10170           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10171                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10172                                          N00.getOperand(0)),
10173                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10174                                          N00.getOperand(1)),
10175                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10176                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10177                                                      N002.getOperand(0)),
10178                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10179                                                      N002.getOperand(1)),
10180                                          DAG.getNode(ISD::FNEG, SL, VT,
10181                                                      N1), Flags), Flags);
10182         }
10183       }
10184     }
10185 
10186     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10187     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10188     if (N1.getOpcode() == PreferredFusedOpcode &&
10189         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10190       SDValue N120 = N1.getOperand(2).getOperand(0);
10191       if (isContractableFMUL(N120) &&
10192           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10193         SDValue N1200 = N120.getOperand(0);
10194         SDValue N1201 = N120.getOperand(1);
10195         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10196                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10197                            N1.getOperand(1),
10198                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10199                                        DAG.getNode(ISD::FNEG, SL, VT,
10200                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10201                                                                VT, N1200)),
10202                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10203                                                    N1201),
10204                                        N0, Flags), Flags);
10205       }
10206     }
10207 
10208     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10209     //   -> (fma (fneg (fpext y)), (fpext z),
10210     //           (fma (fneg (fpext u)), (fpext v), x))
10211     // FIXME: This turns two single-precision and one double-precision
10212     // operation into two double-precision operations, which might not be
10213     // interesting for all targets, especially GPUs.
10214     if (N1.getOpcode() == ISD::FP_EXTEND &&
10215         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10216       SDValue CvtSrc = N1.getOperand(0);
10217       SDValue N100 = CvtSrc.getOperand(0);
10218       SDValue N101 = CvtSrc.getOperand(1);
10219       SDValue N102 = CvtSrc.getOperand(2);
10220       if (isContractableFMUL(N102) &&
10221           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10222         SDValue N1020 = N102.getOperand(0);
10223         SDValue N1021 = N102.getOperand(1);
10224         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10225                            DAG.getNode(ISD::FNEG, SL, VT,
10226                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10227                                                    N100)),
10228                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10229                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10230                                        DAG.getNode(ISD::FNEG, SL, VT,
10231                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10232                                                                VT, N1020)),
10233                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10234                                                    N1021),
10235                                        N0, Flags), Flags);
10236       }
10237     }
10238   }
10239 
10240   return SDValue();
10241 }
10242 
10243 /// Try to perform FMA combining on a given FMUL node based on the distributive
10244 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10245 /// subtraction instead of addition).
10246 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10247   SDValue N0 = N->getOperand(0);
10248   SDValue N1 = N->getOperand(1);
10249   EVT VT = N->getValueType(0);
10250   SDLoc SL(N);
10251   const SDNodeFlags Flags = N->getFlags();
10252 
10253   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10254 
10255   const TargetOptions &Options = DAG.getTarget().Options;
10256 
10257   // The transforms below are incorrect when x == 0 and y == inf, because the
10258   // intermediate multiplication produces a nan.
10259   if (!Options.NoInfsFPMath)
10260     return SDValue();
10261 
10262   // Floating-point multiply-add without intermediate rounding.
10263   bool HasFMA =
10264       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10265       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10266       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10267 
10268   // Floating-point multiply-add with intermediate rounding. This can result
10269   // in a less precise result due to the changed rounding order.
10270   bool HasFMAD = Options.UnsafeFPMath &&
10271                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10272 
10273   // No valid opcode, do not combine.
10274   if (!HasFMAD && !HasFMA)
10275     return SDValue();
10276 
10277   // Always prefer FMAD to FMA for precision.
10278   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10279   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10280 
10281   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
10282   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
10283   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10284     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10285       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10286       if (XC1 && XC1->isExactlyValue(+1.0))
10287         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10288                            Y, Flags);
10289       if (XC1 && XC1->isExactlyValue(-1.0))
10290         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10291                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10292     }
10293     return SDValue();
10294   };
10295 
10296   if (SDValue FMA = FuseFADD(N0, N1, Flags))
10297     return FMA;
10298   if (SDValue FMA = FuseFADD(N1, N0, Flags))
10299     return FMA;
10300 
10301   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
10302   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
10303   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
10304   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
10305   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10306     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10307       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
10308       if (XC0 && XC0->isExactlyValue(+1.0))
10309         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10310                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10311                            Y, Flags);
10312       if (XC0 && XC0->isExactlyValue(-1.0))
10313         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10314                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10315                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10316 
10317       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10318       if (XC1 && XC1->isExactlyValue(+1.0))
10319         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10320                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10321       if (XC1 && XC1->isExactlyValue(-1.0))
10322         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10323                            Y, Flags);
10324     }
10325     return SDValue();
10326   };
10327 
10328   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10329     return FMA;
10330   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10331     return FMA;
10332 
10333   return SDValue();
10334 }
10335 
10336 static bool isFMulNegTwo(SDValue &N) {
10337   if (N.getOpcode() != ISD::FMUL)
10338     return false;
10339   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
10340     return CFP->isExactlyValue(-2.0);
10341   return false;
10342 }
10343 
10344 SDValue DAGCombiner::visitFADD(SDNode *N) {
10345   SDValue N0 = N->getOperand(0);
10346   SDValue N1 = N->getOperand(1);
10347   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10348   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10349   EVT VT = N->getValueType(0);
10350   SDLoc DL(N);
10351   const TargetOptions &Options = DAG.getTarget().Options;
10352   const SDNodeFlags Flags = N->getFlags();
10353 
10354   // fold vector ops
10355   if (VT.isVector())
10356     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10357       return FoldedVOp;
10358 
10359   // fold (fadd c1, c2) -> c1 + c2
10360   if (N0CFP && N1CFP)
10361     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10362 
10363   // canonicalize constant to RHS
10364   if (N0CFP && !N1CFP)
10365     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10366 
10367   if (SDValue NewSel = foldBinOpIntoSelect(N))
10368     return NewSel;
10369 
10370   // fold (fadd A, (fneg B)) -> (fsub A, B)
10371   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10372       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10373     return DAG.getNode(ISD::FSUB, DL, VT, N0,
10374                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10375 
10376   // fold (fadd (fneg A), B) -> (fsub B, A)
10377   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10378       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10379     return DAG.getNode(ISD::FSUB, DL, VT, N1,
10380                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10381 
10382   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
10383   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
10384   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
10385       (isFMulNegTwo(N1) && N1.hasOneUse())) {
10386     bool N1IsFMul = isFMulNegTwo(N1);
10387     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
10388     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
10389     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
10390   }
10391 
10392   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
10393   if (N1C && N1C->isZero()) {
10394     if (N1C->isNegative() || Options.UnsafeFPMath ||
10395         Flags.hasNoSignedZeros()) {
10396       // fold (fadd A, 0) -> A
10397       return N0;
10398     }
10399   }
10400 
10401   // No FP constant should be created after legalization as Instruction
10402   // Selection pass has a hard time dealing with FP constants.
10403   bool AllowNewConst = (Level < AfterLegalizeDAG);
10404 
10405   // If 'unsafe math' or nnan is enabled, fold lots of things.
10406   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10407     // If allowed, fold (fadd (fneg x), x) -> 0.0
10408     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10409       return DAG.getConstantFP(0.0, DL, VT);
10410 
10411     // If allowed, fold (fadd x, (fneg x)) -> 0.0
10412     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10413       return DAG.getConstantFP(0.0, DL, VT);
10414   }
10415 
10416   // If 'unsafe math' or reassoc and nsz, fold lots of things.
10417   // TODO: break out portions of the transformations below for which Unsafe is
10418   //       considered and which do not require both nsz and reassoc
10419   if ((Options.UnsafeFPMath ||
10420        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10421       AllowNewConst) {
10422     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10423     if (N1CFP && N0.getOpcode() == ISD::FADD &&
10424         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10425       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10426       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10427     }
10428 
10429     // We can fold chains of FADD's of the same value into multiplications.
10430     // This transform is not safe in general because we are reducing the number
10431     // of rounding steps.
10432     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10433       if (N0.getOpcode() == ISD::FMUL) {
10434         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10435         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10436 
10437         // (fadd (fmul x, c), x) -> (fmul x, c+1)
10438         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10439           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10440                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10441           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10442         }
10443 
10444         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10445         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10446             N1.getOperand(0) == N1.getOperand(1) &&
10447             N0.getOperand(0) == N1.getOperand(0)) {
10448           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10449                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10450           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10451         }
10452       }
10453 
10454       if (N1.getOpcode() == ISD::FMUL) {
10455         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10456         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10457 
10458         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10459         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10460           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10461                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10462           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10463         }
10464 
10465         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10466         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10467             N0.getOperand(0) == N0.getOperand(1) &&
10468             N1.getOperand(0) == N0.getOperand(0)) {
10469           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10470                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10471           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10472         }
10473       }
10474 
10475       if (N0.getOpcode() == ISD::FADD) {
10476         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10477         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10478         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10479             (N0.getOperand(0) == N1)) {
10480           return DAG.getNode(ISD::FMUL, DL, VT,
10481                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10482         }
10483       }
10484 
10485       if (N1.getOpcode() == ISD::FADD) {
10486         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10487         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10488         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10489             N1.getOperand(0) == N0) {
10490           return DAG.getNode(ISD::FMUL, DL, VT,
10491                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10492         }
10493       }
10494 
10495       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10496       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10497           N0.getOperand(0) == N0.getOperand(1) &&
10498           N1.getOperand(0) == N1.getOperand(1) &&
10499           N0.getOperand(0) == N1.getOperand(0)) {
10500         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10501                            DAG.getConstantFP(4.0, DL, VT), Flags);
10502       }
10503     }
10504   } // enable-unsafe-fp-math
10505 
10506   // FADD -> FMA combines:
10507   if (SDValue Fused = visitFADDForFMACombine(N)) {
10508     AddToWorklist(Fused.getNode());
10509     return Fused;
10510   }
10511   return SDValue();
10512 }
10513 
10514 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10515   SDValue N0 = N->getOperand(0);
10516   SDValue N1 = N->getOperand(1);
10517   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10518   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10519   EVT VT = N->getValueType(0);
10520   SDLoc DL(N);
10521   const TargetOptions &Options = DAG.getTarget().Options;
10522   const SDNodeFlags Flags = N->getFlags();
10523 
10524   // fold vector ops
10525   if (VT.isVector())
10526     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10527       return FoldedVOp;
10528 
10529   // fold (fsub c1, c2) -> c1-c2
10530   if (N0CFP && N1CFP)
10531     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10532 
10533   if (SDValue NewSel = foldBinOpIntoSelect(N))
10534     return NewSel;
10535 
10536   // (fsub A, 0) -> A
10537   if (N1CFP && N1CFP->isZero()) {
10538     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
10539         Flags.hasNoSignedZeros()) {
10540       return N0;
10541     }
10542   }
10543 
10544   if (N0 == N1) {
10545     // (fsub x, x) -> 0.0
10546     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
10547       return DAG.getConstantFP(0.0f, DL, VT);
10548   }
10549 
10550   // (fsub 0, B) -> -B
10551   if (N0CFP && N0CFP->isZero()) {
10552     if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
10553       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10554         return GetNegatedExpression(N1, DAG, LegalOperations);
10555       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10556         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10557     }
10558   }
10559 
10560   // fold (fsub A, (fneg B)) -> (fadd A, B)
10561   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10562     return DAG.getNode(ISD::FADD, DL, VT, N0,
10563                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10564 
10565   // If 'unsafe math' is enabled, fold lots of things.
10566   if (Options.UnsafeFPMath) {
10567     // (fsub x, (fadd x, y)) -> (fneg y)
10568     // (fsub x, (fadd y, x)) -> (fneg y)
10569     if (N1.getOpcode() == ISD::FADD) {
10570       SDValue N10 = N1->getOperand(0);
10571       SDValue N11 = N1->getOperand(1);
10572 
10573       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
10574         return GetNegatedExpression(N11, DAG, LegalOperations);
10575 
10576       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
10577         return GetNegatedExpression(N10, DAG, LegalOperations);
10578     }
10579   }
10580 
10581   // FSUB -> FMA combines:
10582   if (SDValue Fused = visitFSUBForFMACombine(N)) {
10583     AddToWorklist(Fused.getNode());
10584     return Fused;
10585   }
10586 
10587   return SDValue();
10588 }
10589 
10590 SDValue DAGCombiner::visitFMUL(SDNode *N) {
10591   SDValue N0 = N->getOperand(0);
10592   SDValue N1 = N->getOperand(1);
10593   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10594   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10595   EVT VT = N->getValueType(0);
10596   SDLoc DL(N);
10597   const TargetOptions &Options = DAG.getTarget().Options;
10598   const SDNodeFlags Flags = N->getFlags();
10599 
10600   // fold vector ops
10601   if (VT.isVector()) {
10602     // This just handles C1 * C2 for vectors. Other vector folds are below.
10603     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10604       return FoldedVOp;
10605   }
10606 
10607   // fold (fmul c1, c2) -> c1*c2
10608   if (N0CFP && N1CFP)
10609     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10610 
10611   // canonicalize constant to RHS
10612   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10613      !isConstantFPBuildVectorOrConstantFP(N1))
10614     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10615 
10616   // fold (fmul A, 1.0) -> A
10617   if (N1CFP && N1CFP->isExactlyValue(1.0))
10618     return N0;
10619 
10620   if (SDValue NewSel = foldBinOpIntoSelect(N))
10621     return NewSel;
10622 
10623   if (Options.UnsafeFPMath ||
10624       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
10625     // fold (fmul A, 0) -> 0
10626     if (N1CFP && N1CFP->isZero())
10627       return N1;
10628   }
10629 
10630   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
10631     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
10632     if (N0.getOpcode() == ISD::FMUL) {
10633       // Fold scalars or any vector constants (not just splats).
10634       // This fold is done in general by InstCombine, but extra fmul insts
10635       // may have been generated during lowering.
10636       SDValue N00 = N0.getOperand(0);
10637       SDValue N01 = N0.getOperand(1);
10638       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
10639       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
10640       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
10641 
10642       // Check 1: Make sure that the first operand of the inner multiply is NOT
10643       // a constant. Otherwise, we may induce infinite looping.
10644       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
10645         // Check 2: Make sure that the second operand of the inner multiply and
10646         // the second operand of the outer multiply are constants.
10647         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
10648             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
10649           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
10650           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
10651         }
10652       }
10653     }
10654 
10655     // Match a special-case: we convert X * 2.0 into fadd.
10656     // fmul (fadd X, X), C -> fmul X, 2.0 * C
10657     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
10658         N0.getOperand(0) == N0.getOperand(1)) {
10659       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
10660       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
10661       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
10662     }
10663   }
10664 
10665   // fold (fmul X, 2.0) -> (fadd X, X)
10666   if (N1CFP && N1CFP->isExactlyValue(+2.0))
10667     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
10668 
10669   // fold (fmul X, -1.0) -> (fneg X)
10670   if (N1CFP && N1CFP->isExactlyValue(-1.0))
10671     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10672       return DAG.getNode(ISD::FNEG, DL, VT, N0);
10673 
10674   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
10675   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
10676     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
10677       // Both can be negated for free, check to see if at least one is cheaper
10678       // negated.
10679       if (LHSNeg == 2 || RHSNeg == 2)
10680         return DAG.getNode(ISD::FMUL, DL, VT,
10681                            GetNegatedExpression(N0, DAG, LegalOperations),
10682                            GetNegatedExpression(N1, DAG, LegalOperations),
10683                            Flags);
10684     }
10685   }
10686 
10687   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
10688   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
10689   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
10690       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
10691       TLI.isOperationLegal(ISD::FABS, VT)) {
10692     SDValue Select = N0, X = N1;
10693     if (Select.getOpcode() != ISD::SELECT)
10694       std::swap(Select, X);
10695 
10696     SDValue Cond = Select.getOperand(0);
10697     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
10698     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
10699 
10700     if (TrueOpnd && FalseOpnd &&
10701         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
10702         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
10703         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
10704       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10705       switch (CC) {
10706       default: break;
10707       case ISD::SETOLT:
10708       case ISD::SETULT:
10709       case ISD::SETOLE:
10710       case ISD::SETULE:
10711       case ISD::SETLT:
10712       case ISD::SETLE:
10713         std::swap(TrueOpnd, FalseOpnd);
10714         LLVM_FALLTHROUGH;
10715       case ISD::SETOGT:
10716       case ISD::SETUGT:
10717       case ISD::SETOGE:
10718       case ISD::SETUGE:
10719       case ISD::SETGT:
10720       case ISD::SETGE:
10721         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
10722             TLI.isOperationLegal(ISD::FNEG, VT))
10723           return DAG.getNode(ISD::FNEG, DL, VT,
10724                    DAG.getNode(ISD::FABS, DL, VT, X));
10725         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
10726           return DAG.getNode(ISD::FABS, DL, VT, X);
10727 
10728         break;
10729       }
10730     }
10731   }
10732 
10733   // FMUL -> FMA combines:
10734   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
10735     AddToWorklist(Fused.getNode());
10736     return Fused;
10737   }
10738 
10739   return SDValue();
10740 }
10741 
10742 SDValue DAGCombiner::visitFMA(SDNode *N) {
10743   SDValue N0 = N->getOperand(0);
10744   SDValue N1 = N->getOperand(1);
10745   SDValue N2 = N->getOperand(2);
10746   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10747   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10748   EVT VT = N->getValueType(0);
10749   SDLoc DL(N);
10750   const TargetOptions &Options = DAG.getTarget().Options;
10751 
10752   // FMA nodes have flags that propagate to the created nodes.
10753   const SDNodeFlags Flags = N->getFlags();
10754   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
10755 
10756   // Constant fold FMA.
10757   if (isa<ConstantFPSDNode>(N0) &&
10758       isa<ConstantFPSDNode>(N1) &&
10759       isa<ConstantFPSDNode>(N2)) {
10760     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
10761   }
10762 
10763   if (UnsafeFPMath) {
10764     if (N0CFP && N0CFP->isZero())
10765       return N2;
10766     if (N1CFP && N1CFP->isZero())
10767       return N2;
10768   }
10769   // TODO: The FMA node should have flags that propagate to these nodes.
10770   if (N0CFP && N0CFP->isExactlyValue(1.0))
10771     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
10772   if (N1CFP && N1CFP->isExactlyValue(1.0))
10773     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
10774 
10775   // Canonicalize (fma c, x, y) -> (fma x, c, y)
10776   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10777      !isConstantFPBuildVectorOrConstantFP(N1))
10778     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
10779 
10780   if (UnsafeFPMath) {
10781     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
10782     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
10783         isConstantFPBuildVectorOrConstantFP(N1) &&
10784         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
10785       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10786                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
10787                                      Flags), Flags);
10788     }
10789 
10790     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
10791     if (N0.getOpcode() == ISD::FMUL &&
10792         isConstantFPBuildVectorOrConstantFP(N1) &&
10793         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10794       return DAG.getNode(ISD::FMA, DL, VT,
10795                          N0.getOperand(0),
10796                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
10797                                      Flags),
10798                          N2);
10799     }
10800   }
10801 
10802   // (fma x, 1, y) -> (fadd x, y)
10803   // (fma x, -1, y) -> (fadd (fneg x), y)
10804   if (N1CFP) {
10805     if (N1CFP->isExactlyValue(1.0))
10806       // TODO: The FMA node should have flags that propagate to this node.
10807       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
10808 
10809     if (N1CFP->isExactlyValue(-1.0) &&
10810         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
10811       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
10812       AddToWorklist(RHSNeg.getNode());
10813       // TODO: The FMA node should have flags that propagate to this node.
10814       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
10815     }
10816 
10817     // fma (fneg x), K, y -> fma x -K, y
10818     if (N0.getOpcode() == ISD::FNEG &&
10819         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10820          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
10821       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
10822                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
10823     }
10824   }
10825 
10826   if (UnsafeFPMath) {
10827     // (fma x, c, x) -> (fmul x, (c+1))
10828     if (N1CFP && N0 == N2) {
10829       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10830                          DAG.getNode(ISD::FADD, DL, VT, N1,
10831                                      DAG.getConstantFP(1.0, DL, VT), Flags),
10832                          Flags);
10833     }
10834 
10835     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
10836     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
10837       return DAG.getNode(ISD::FMUL, DL, VT, N0,
10838                          DAG.getNode(ISD::FADD, DL, VT, N1,
10839                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
10840                          Flags);
10841     }
10842   }
10843 
10844   return SDValue();
10845 }
10846 
10847 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
10848 // reciprocal.
10849 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
10850 // Notice that this is not always beneficial. One reason is different targets
10851 // may have different costs for FDIV and FMUL, so sometimes the cost of two
10852 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
10853 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
10854 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
10855   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
10856   const SDNodeFlags Flags = N->getFlags();
10857   if (!UnsafeMath && !Flags.hasAllowReciprocal())
10858     return SDValue();
10859 
10860   // Skip if current node is a reciprocal.
10861   SDValue N0 = N->getOperand(0);
10862   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10863   if (N0CFP && N0CFP->isExactlyValue(1.0))
10864     return SDValue();
10865 
10866   // Exit early if the target does not want this transform or if there can't
10867   // possibly be enough uses of the divisor to make the transform worthwhile.
10868   SDValue N1 = N->getOperand(1);
10869   unsigned MinUses = TLI.combineRepeatedFPDivisors();
10870   if (!MinUses || N1->use_size() < MinUses)
10871     return SDValue();
10872 
10873   // Find all FDIV users of the same divisor.
10874   // Use a set because duplicates may be present in the user list.
10875   SetVector<SDNode *> Users;
10876   for (auto *U : N1->uses()) {
10877     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
10878       // This division is eligible for optimization only if global unsafe math
10879       // is enabled or if this division allows reciprocal formation.
10880       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
10881         Users.insert(U);
10882     }
10883   }
10884 
10885   // Now that we have the actual number of divisor uses, make sure it meets
10886   // the minimum threshold specified by the target.
10887   if (Users.size() < MinUses)
10888     return SDValue();
10889 
10890   EVT VT = N->getValueType(0);
10891   SDLoc DL(N);
10892   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
10893   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
10894 
10895   // Dividend / Divisor -> Dividend * Reciprocal
10896   for (auto *U : Users) {
10897     SDValue Dividend = U->getOperand(0);
10898     if (Dividend != FPOne) {
10899       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
10900                                     Reciprocal, Flags);
10901       CombineTo(U, NewNode);
10902     } else if (U != Reciprocal.getNode()) {
10903       // In the absence of fast-math-flags, this user node is always the
10904       // same node as Reciprocal, but with FMF they may be different nodes.
10905       CombineTo(U, Reciprocal);
10906     }
10907   }
10908   return SDValue(N, 0);  // N was replaced.
10909 }
10910 
10911 SDValue DAGCombiner::visitFDIV(SDNode *N) {
10912   SDValue N0 = N->getOperand(0);
10913   SDValue N1 = N->getOperand(1);
10914   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
10915   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
10916   EVT VT = N->getValueType(0);
10917   SDLoc DL(N);
10918   const TargetOptions &Options = DAG.getTarget().Options;
10919   SDNodeFlags Flags = N->getFlags();
10920 
10921   // fold vector ops
10922   if (VT.isVector())
10923     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10924       return FoldedVOp;
10925 
10926   // fold (fdiv c1, c2) -> c1/c2
10927   if (N0CFP && N1CFP)
10928     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
10929 
10930   if (SDValue NewSel = foldBinOpIntoSelect(N))
10931     return NewSel;
10932 
10933   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
10934     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
10935     if (N1CFP) {
10936       // Compute the reciprocal 1.0 / c2.
10937       const APFloat &N1APF = N1CFP->getValueAPF();
10938       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
10939       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
10940       // Only do the transform if the reciprocal is a legal fp immediate that
10941       // isn't too nasty (eg NaN, denormal, ...).
10942       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
10943           (!LegalOperations ||
10944            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
10945            // backend)... we should handle this gracefully after Legalize.
10946            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
10947            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
10948            TLI.isFPImmLegal(Recip, VT)))
10949         return DAG.getNode(ISD::FMUL, DL, VT, N0,
10950                            DAG.getConstantFP(Recip, DL, VT), Flags);
10951     }
10952 
10953     // If this FDIV is part of a reciprocal square root, it may be folded
10954     // into a target-specific square root estimate instruction.
10955     if (N1.getOpcode() == ISD::FSQRT) {
10956       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
10957         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10958       }
10959     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
10960                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10961       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10962                                           Flags)) {
10963         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
10964         AddToWorklist(RV.getNode());
10965         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10966       }
10967     } else if (N1.getOpcode() == ISD::FP_ROUND &&
10968                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10969       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
10970                                           Flags)) {
10971         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
10972         AddToWorklist(RV.getNode());
10973         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10974       }
10975     } else if (N1.getOpcode() == ISD::FMUL) {
10976       // Look through an FMUL. Even though this won't remove the FDIV directly,
10977       // it's still worthwhile to get rid of the FSQRT if possible.
10978       SDValue SqrtOp;
10979       SDValue OtherOp;
10980       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
10981         SqrtOp = N1.getOperand(0);
10982         OtherOp = N1.getOperand(1);
10983       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
10984         SqrtOp = N1.getOperand(1);
10985         OtherOp = N1.getOperand(0);
10986       }
10987       if (SqrtOp.getNode()) {
10988         // We found a FSQRT, so try to make this fold:
10989         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
10990         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
10991           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
10992           AddToWorklist(RV.getNode());
10993           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
10994         }
10995       }
10996     }
10997 
10998     // Fold into a reciprocal estimate and multiply instead of a real divide.
10999     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11000       AddToWorklist(RV.getNode());
11001       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11002     }
11003   }
11004 
11005   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11006   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11007     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11008       // Both can be negated for free, check to see if at least one is cheaper
11009       // negated.
11010       if (LHSNeg == 2 || RHSNeg == 2)
11011         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11012                            GetNegatedExpression(N0, DAG, LegalOperations),
11013                            GetNegatedExpression(N1, DAG, LegalOperations),
11014                            Flags);
11015     }
11016   }
11017 
11018   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11019     return CombineRepeatedDivisors;
11020 
11021   return SDValue();
11022 }
11023 
11024 SDValue DAGCombiner::visitFREM(SDNode *N) {
11025   SDValue N0 = N->getOperand(0);
11026   SDValue N1 = N->getOperand(1);
11027   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11028   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11029   EVT VT = N->getValueType(0);
11030 
11031   // fold (frem c1, c2) -> fmod(c1,c2)
11032   if (N0CFP && N1CFP)
11033     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11034 
11035   if (SDValue NewSel = foldBinOpIntoSelect(N))
11036     return NewSel;
11037 
11038   return SDValue();
11039 }
11040 
11041 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11042   SDNodeFlags Flags = N->getFlags();
11043   if (!DAG.getTarget().Options.UnsafeFPMath &&
11044       !Flags.hasApproximateFuncs())
11045     return SDValue();
11046 
11047   SDValue N0 = N->getOperand(0);
11048   if (TLI.isFsqrtCheap(N0, DAG))
11049     return SDValue();
11050 
11051   // FSQRT nodes have flags that propagate to the created nodes.
11052   return buildSqrtEstimate(N0, Flags);
11053 }
11054 
11055 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11056 /// copysign(x, fp_round(y)) -> copysign(x, y)
11057 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11058   SDValue N1 = N->getOperand(1);
11059   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11060        N1.getOpcode() == ISD::FP_ROUND)) {
11061     // Do not optimize out type conversion of f128 type yet.
11062     // For some targets like x86_64, configuration is changed to keep one f128
11063     // value in one SSE register, but instruction selection cannot handle
11064     // FCOPYSIGN on SSE registers yet.
11065     EVT N1VT = N1->getValueType(0);
11066     EVT N1Op0VT = N1->getOperand(0).getValueType();
11067     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11068   }
11069   return false;
11070 }
11071 
11072 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11073   SDValue N0 = N->getOperand(0);
11074   SDValue N1 = N->getOperand(1);
11075   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11076   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11077   EVT VT = N->getValueType(0);
11078 
11079   if (N0CFP && N1CFP) // Constant fold
11080     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11081 
11082   if (N1CFP) {
11083     const APFloat &V = N1CFP->getValueAPF();
11084     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11085     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11086     if (!V.isNegative()) {
11087       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11088         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11089     } else {
11090       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11091         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11092                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11093     }
11094   }
11095 
11096   // copysign(fabs(x), y) -> copysign(x, y)
11097   // copysign(fneg(x), y) -> copysign(x, y)
11098   // copysign(copysign(x,z), y) -> copysign(x, y)
11099   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11100       N0.getOpcode() == ISD::FCOPYSIGN)
11101     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11102 
11103   // copysign(x, abs(y)) -> abs(x)
11104   if (N1.getOpcode() == ISD::FABS)
11105     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11106 
11107   // copysign(x, copysign(y,z)) -> copysign(x, z)
11108   if (N1.getOpcode() == ISD::FCOPYSIGN)
11109     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11110 
11111   // copysign(x, fp_extend(y)) -> copysign(x, y)
11112   // copysign(x, fp_round(y)) -> copysign(x, y)
11113   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11114     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11115 
11116   return SDValue();
11117 }
11118 
11119 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11120                                const TargetLowering &TLI) {
11121   // This optimization is guarded by a function attribute because it may produce
11122   // unexpected results. Ie, programs may be relying on the platform-specific
11123   // undefined behavior when the float-to-int conversion overflows.
11124   const Function &F = DAG.getMachineFunction().getFunction();
11125   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11126   if (StrictOverflow.getValueAsString().equals("false"))
11127     return SDValue();
11128 
11129   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11130   // replacing casts with a libcall. We also must be allowed to ignore -0.0
11131   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11132   // conversions would return +0.0.
11133   // FIXME: We should be able to use node-level FMF here.
11134   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11135   EVT VT = N->getValueType(0);
11136   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11137       !DAG.getTarget().Options.NoSignedZerosFPMath)
11138     return SDValue();
11139 
11140   // fptosi/fptoui round towards zero, so converting from FP to integer and
11141   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11142   SDValue N0 = N->getOperand(0);
11143   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11144       N0.getOperand(0).getValueType() == VT)
11145     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11146 
11147   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11148       N0.getOperand(0).getValueType() == VT)
11149     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11150 
11151   return SDValue();
11152 }
11153 
11154 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11155   SDValue N0 = N->getOperand(0);
11156   EVT VT = N->getValueType(0);
11157   EVT OpVT = N0.getValueType();
11158 
11159   // fold (sint_to_fp c1) -> c1fp
11160   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11161       // ...but only if the target supports immediate floating-point values
11162       (!LegalOperations ||
11163        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11164     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11165 
11166   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11167   // but UINT_TO_FP is legal on this target, try to convert.
11168   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
11169       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
11170     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11171     if (DAG.SignBitIsZero(N0))
11172       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11173   }
11174 
11175   // The next optimizations are desirable only if SELECT_CC can be lowered.
11176   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11177     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11178     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11179         !VT.isVector() &&
11180         (!LegalOperations ||
11181          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11182       SDLoc DL(N);
11183       SDValue Ops[] =
11184         { N0.getOperand(0), N0.getOperand(1),
11185           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11186           N0.getOperand(2) };
11187       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11188     }
11189 
11190     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11191     //      (select_cc x, y, 1.0, 0.0,, cc)
11192     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11193         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11194         (!LegalOperations ||
11195          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11196       SDLoc DL(N);
11197       SDValue Ops[] =
11198         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11199           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11200           N0.getOperand(0).getOperand(2) };
11201       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11202     }
11203   }
11204 
11205   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11206     return FTrunc;
11207 
11208   return SDValue();
11209 }
11210 
11211 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11212   SDValue N0 = N->getOperand(0);
11213   EVT VT = N->getValueType(0);
11214   EVT OpVT = N0.getValueType();
11215 
11216   // fold (uint_to_fp c1) -> c1fp
11217   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11218       // ...but only if the target supports immediate floating-point values
11219       (!LegalOperations ||
11220        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11221     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11222 
11223   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11224   // but SINT_TO_FP is legal on this target, try to convert.
11225   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
11226       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
11227     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11228     if (DAG.SignBitIsZero(N0))
11229       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11230   }
11231 
11232   // The next optimizations are desirable only if SELECT_CC can be lowered.
11233   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11234     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11235     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11236         (!LegalOperations ||
11237          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11238       SDLoc DL(N);
11239       SDValue Ops[] =
11240         { N0.getOperand(0), N0.getOperand(1),
11241           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11242           N0.getOperand(2) };
11243       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11244     }
11245   }
11246 
11247   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11248     return FTrunc;
11249 
11250   return SDValue();
11251 }
11252 
11253 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11254 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11255   SDValue N0 = N->getOperand(0);
11256   EVT VT = N->getValueType(0);
11257 
11258   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11259     return SDValue();
11260 
11261   SDValue Src = N0.getOperand(0);
11262   EVT SrcVT = Src.getValueType();
11263   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11264   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11265 
11266   // We can safely assume the conversion won't overflow the output range,
11267   // because (for example) (uint8_t)18293.f is undefined behavior.
11268 
11269   // Since we can assume the conversion won't overflow, our decision as to
11270   // whether the input will fit in the float should depend on the minimum
11271   // of the input range and output range.
11272 
11273   // This means this is also safe for a signed input and unsigned output, since
11274   // a negative input would lead to undefined behavior.
11275   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11276   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11277   unsigned ActualSize = std::min(InputSize, OutputSize);
11278   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11279 
11280   // We can only fold away the float conversion if the input range can be
11281   // represented exactly in the float range.
11282   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11283     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11284       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11285                                                        : ISD::ZERO_EXTEND;
11286       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11287     }
11288     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11289       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11290     return DAG.getBitcast(VT, Src);
11291   }
11292   return SDValue();
11293 }
11294 
11295 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11296   SDValue N0 = N->getOperand(0);
11297   EVT VT = N->getValueType(0);
11298 
11299   // fold (fp_to_sint c1fp) -> c1
11300   if (isConstantFPBuildVectorOrConstantFP(N0))
11301     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11302 
11303   return FoldIntToFPToInt(N, DAG);
11304 }
11305 
11306 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11307   SDValue N0 = N->getOperand(0);
11308   EVT VT = N->getValueType(0);
11309 
11310   // fold (fp_to_uint c1fp) -> c1
11311   if (isConstantFPBuildVectorOrConstantFP(N0))
11312     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11313 
11314   return FoldIntToFPToInt(N, DAG);
11315 }
11316 
11317 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11318   SDValue N0 = N->getOperand(0);
11319   SDValue N1 = N->getOperand(1);
11320   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11321   EVT VT = N->getValueType(0);
11322 
11323   // fold (fp_round c1fp) -> c1fp
11324   if (N0CFP)
11325     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11326 
11327   // fold (fp_round (fp_extend x)) -> x
11328   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11329     return N0.getOperand(0);
11330 
11331   // fold (fp_round (fp_round x)) -> (fp_round x)
11332   if (N0.getOpcode() == ISD::FP_ROUND) {
11333     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11334     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11335 
11336     // Skip this folding if it results in an fp_round from f80 to f16.
11337     //
11338     // f80 to f16 always generates an expensive (and as yet, unimplemented)
11339     // libcall to __truncxfhf2 instead of selecting native f16 conversion
11340     // instructions from f32 or f64.  Moreover, the first (value-preserving)
11341     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11342     // x86.
11343     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11344       return SDValue();
11345 
11346     // If the first fp_round isn't a value preserving truncation, it might
11347     // introduce a tie in the second fp_round, that wouldn't occur in the
11348     // single-step fp_round we want to fold to.
11349     // In other words, double rounding isn't the same as rounding.
11350     // Also, this is a value preserving truncation iff both fp_round's are.
11351     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11352       SDLoc DL(N);
11353       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11354                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11355     }
11356   }
11357 
11358   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11359   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11360     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11361                               N0.getOperand(0), N1);
11362     AddToWorklist(Tmp.getNode());
11363     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11364                        Tmp, N0.getOperand(1));
11365   }
11366 
11367   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11368     return NewVSel;
11369 
11370   return SDValue();
11371 }
11372 
11373 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11374   SDValue N0 = N->getOperand(0);
11375   EVT VT = N->getValueType(0);
11376   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11377   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11378 
11379   // fold (fp_round_inreg c1fp) -> c1fp
11380   if (N0CFP && isTypeLegal(EVT)) {
11381     SDLoc DL(N);
11382     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11383     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11384   }
11385 
11386   return SDValue();
11387 }
11388 
11389 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11390   SDValue N0 = N->getOperand(0);
11391   EVT VT = N->getValueType(0);
11392 
11393   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11394   if (N->hasOneUse() &&
11395       N->use_begin()->getOpcode() == ISD::FP_ROUND)
11396     return SDValue();
11397 
11398   // fold (fp_extend c1fp) -> c1fp
11399   if (isConstantFPBuildVectorOrConstantFP(N0))
11400     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11401 
11402   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11403   if (N0.getOpcode() == ISD::FP16_TO_FP &&
11404       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11405     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11406 
11407   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11408   // value of X.
11409   if (N0.getOpcode() == ISD::FP_ROUND
11410       && N0.getConstantOperandVal(1) == 1) {
11411     SDValue In = N0.getOperand(0);
11412     if (In.getValueType() == VT) return In;
11413     if (VT.bitsLT(In.getValueType()))
11414       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11415                          In, N0.getOperand(1));
11416     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11417   }
11418 
11419   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11420   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11421        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11422     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11423     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11424                                      LN0->getChain(),
11425                                      LN0->getBasePtr(), N0.getValueType(),
11426                                      LN0->getMemOperand());
11427     CombineTo(N, ExtLoad);
11428     CombineTo(N0.getNode(),
11429               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11430                           N0.getValueType(), ExtLoad,
11431                           DAG.getIntPtrConstant(1, SDLoc(N0))),
11432               ExtLoad.getValue(1));
11433     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11434   }
11435 
11436   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11437     return NewVSel;
11438 
11439   return SDValue();
11440 }
11441 
11442 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
11443   SDValue N0 = N->getOperand(0);
11444   EVT VT = N->getValueType(0);
11445 
11446   // fold (fceil c1) -> fceil(c1)
11447   if (isConstantFPBuildVectorOrConstantFP(N0))
11448     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
11449 
11450   return SDValue();
11451 }
11452 
11453 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
11454   SDValue N0 = N->getOperand(0);
11455   EVT VT = N->getValueType(0);
11456 
11457   // fold (ftrunc c1) -> ftrunc(c1)
11458   if (isConstantFPBuildVectorOrConstantFP(N0))
11459     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
11460 
11461   // fold ftrunc (known rounded int x) -> x
11462   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
11463   // likely to be generated to extract integer from a rounded floating value.
11464   switch (N0.getOpcode()) {
11465   default: break;
11466   case ISD::FRINT:
11467   case ISD::FTRUNC:
11468   case ISD::FNEARBYINT:
11469   case ISD::FFLOOR:
11470   case ISD::FCEIL:
11471     return N0;
11472   }
11473 
11474   return SDValue();
11475 }
11476 
11477 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11478   SDValue N0 = N->getOperand(0);
11479   EVT VT = N->getValueType(0);
11480 
11481   // fold (ffloor c1) -> ffloor(c1)
11482   if (isConstantFPBuildVectorOrConstantFP(N0))
11483     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11484 
11485   return SDValue();
11486 }
11487 
11488 // FIXME: FNEG and FABS have a lot in common; refactor.
11489 SDValue DAGCombiner::visitFNEG(SDNode *N) {
11490   SDValue N0 = N->getOperand(0);
11491   EVT VT = N->getValueType(0);
11492 
11493   // Constant fold FNEG.
11494   if (isConstantFPBuildVectorOrConstantFP(N0))
11495     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11496 
11497   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11498                          &DAG.getTarget().Options))
11499     return GetNegatedExpression(N0, DAG, LegalOperations);
11500 
11501   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11502   // constant pool values.
11503   if (!TLI.isFNegFree(VT) &&
11504       N0.getOpcode() == ISD::BITCAST &&
11505       N0.getNode()->hasOneUse()) {
11506     SDValue Int = N0.getOperand(0);
11507     EVT IntVT = Int.getValueType();
11508     if (IntVT.isInteger() && !IntVT.isVector()) {
11509       APInt SignMask;
11510       if (N0.getValueType().isVector()) {
11511         // For a vector, get a mask such as 0x80... per scalar element
11512         // and splat it.
11513         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11514         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11515       } else {
11516         // For a scalar, just generate 0x80...
11517         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11518       }
11519       SDLoc DL0(N0);
11520       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11521                         DAG.getConstant(SignMask, DL0, IntVT));
11522       AddToWorklist(Int.getNode());
11523       return DAG.getBitcast(VT, Int);
11524     }
11525   }
11526 
11527   // (fneg (fmul c, x)) -> (fmul -c, x)
11528   if (N0.getOpcode() == ISD::FMUL &&
11529       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11530     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11531     if (CFP1) {
11532       APFloat CVal = CFP1->getValueAPF();
11533       CVal.changeSign();
11534       if (Level >= AfterLegalizeDAG &&
11535           (TLI.isFPImmLegal(CVal, VT) ||
11536            TLI.isOperationLegal(ISD::ConstantFP, VT)))
11537         return DAG.getNode(
11538             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11539             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11540             N0->getFlags());
11541     }
11542   }
11543 
11544   return SDValue();
11545 }
11546 
11547 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11548   SDValue N0 = N->getOperand(0);
11549   SDValue N1 = N->getOperand(1);
11550   EVT VT = N->getValueType(0);
11551   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11552   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11553 
11554   if (N0CFP && N1CFP) {
11555     const APFloat &C0 = N0CFP->getValueAPF();
11556     const APFloat &C1 = N1CFP->getValueAPF();
11557     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11558   }
11559 
11560   // Canonicalize to constant on RHS.
11561   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11562      !isConstantFPBuildVectorOrConstantFP(N1))
11563     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11564 
11565   return SDValue();
11566 }
11567 
11568 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11569   SDValue N0 = N->getOperand(0);
11570   SDValue N1 = N->getOperand(1);
11571   EVT VT = N->getValueType(0);
11572   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11573   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11574 
11575   if (N0CFP && N1CFP) {
11576     const APFloat &C0 = N0CFP->getValueAPF();
11577     const APFloat &C1 = N1CFP->getValueAPF();
11578     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11579   }
11580 
11581   // Canonicalize to constant on RHS.
11582   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11583      !isConstantFPBuildVectorOrConstantFP(N1))
11584     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11585 
11586   return SDValue();
11587 }
11588 
11589 SDValue DAGCombiner::visitFABS(SDNode *N) {
11590   SDValue N0 = N->getOperand(0);
11591   EVT VT = N->getValueType(0);
11592 
11593   // fold (fabs c1) -> fabs(c1)
11594   if (isConstantFPBuildVectorOrConstantFP(N0))
11595     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11596 
11597   // fold (fabs (fabs x)) -> (fabs x)
11598   if (N0.getOpcode() == ISD::FABS)
11599     return N->getOperand(0);
11600 
11601   // fold (fabs (fneg x)) -> (fabs x)
11602   // fold (fabs (fcopysign x, y)) -> (fabs x)
11603   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11604     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11605 
11606   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11607   // constant pool values.
11608   if (!TLI.isFAbsFree(VT) &&
11609       N0.getOpcode() == ISD::BITCAST &&
11610       N0.getNode()->hasOneUse()) {
11611     SDValue Int = N0.getOperand(0);
11612     EVT IntVT = Int.getValueType();
11613     if (IntVT.isInteger() && !IntVT.isVector()) {
11614       APInt SignMask;
11615       if (N0.getValueType().isVector()) {
11616         // For a vector, get a mask such as 0x7f... per scalar element
11617         // and splat it.
11618         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
11619         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11620       } else {
11621         // For a scalar, just generate 0x7f...
11622         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
11623       }
11624       SDLoc DL(N0);
11625       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
11626                         DAG.getConstant(SignMask, DL, IntVT));
11627       AddToWorklist(Int.getNode());
11628       return DAG.getBitcast(N->getValueType(0), Int);
11629     }
11630   }
11631 
11632   return SDValue();
11633 }
11634 
11635 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
11636   SDValue Chain = N->getOperand(0);
11637   SDValue N1 = N->getOperand(1);
11638   SDValue N2 = N->getOperand(2);
11639 
11640   // If N is a constant we could fold this into a fallthrough or unconditional
11641   // branch. However that doesn't happen very often in normal code, because
11642   // Instcombine/SimplifyCFG should have handled the available opportunities.
11643   // If we did this folding here, it would be necessary to update the
11644   // MachineBasicBlock CFG, which is awkward.
11645 
11646   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
11647   // on the target.
11648   if (N1.getOpcode() == ISD::SETCC &&
11649       TLI.isOperationLegalOrCustom(ISD::BR_CC,
11650                                    N1.getOperand(0).getValueType())) {
11651     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11652                        Chain, N1.getOperand(2),
11653                        N1.getOperand(0), N1.getOperand(1), N2);
11654   }
11655 
11656   if (N1.hasOneUse()) {
11657     if (SDValue NewN1 = rebuildSetCC(N1))
11658       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
11659   }
11660 
11661   return SDValue();
11662 }
11663 
11664 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
11665   if (N.getOpcode() == ISD::SRL ||
11666       (N.getOpcode() == ISD::TRUNCATE &&
11667        (N.getOperand(0).hasOneUse() &&
11668         N.getOperand(0).getOpcode() == ISD::SRL))) {
11669     // Look pass the truncate.
11670     if (N.getOpcode() == ISD::TRUNCATE)
11671       N = N.getOperand(0);
11672 
11673     // Match this pattern so that we can generate simpler code:
11674     //
11675     //   %a = ...
11676     //   %b = and i32 %a, 2
11677     //   %c = srl i32 %b, 1
11678     //   brcond i32 %c ...
11679     //
11680     // into
11681     //
11682     //   %a = ...
11683     //   %b = and i32 %a, 2
11684     //   %c = setcc eq %b, 0
11685     //   brcond %c ...
11686     //
11687     // This applies only when the AND constant value has one bit set and the
11688     // SRL constant is equal to the log2 of the AND constant. The back-end is
11689     // smart enough to convert the result into a TEST/JMP sequence.
11690     SDValue Op0 = N.getOperand(0);
11691     SDValue Op1 = N.getOperand(1);
11692 
11693     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
11694       SDValue AndOp1 = Op0.getOperand(1);
11695 
11696       if (AndOp1.getOpcode() == ISD::Constant) {
11697         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
11698 
11699         if (AndConst.isPowerOf2() &&
11700             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
11701           SDLoc DL(N);
11702           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
11703                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
11704                               ISD::SETNE);
11705         }
11706       }
11707     }
11708   }
11709 
11710   // Transform br(xor(x, y)) -> br(x != y)
11711   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
11712   if (N.getOpcode() == ISD::XOR) {
11713     // Because we may call this on a speculatively constructed
11714     // SimplifiedSetCC Node, we need to simplify this node first.
11715     // Ideally this should be folded into SimplifySetCC and not
11716     // here. For now, grab a handle to N so we don't lose it from
11717     // replacements interal to the visit.
11718     HandleSDNode XORHandle(N);
11719     while (N.getOpcode() == ISD::XOR) {
11720       SDValue Tmp = visitXOR(N.getNode());
11721       // No simplification done.
11722       if (!Tmp.getNode())
11723         break;
11724       // Returning N is form in-visit replacement that may invalidated
11725       // N. Grab value from Handle.
11726       if (Tmp.getNode() == N.getNode())
11727         N = XORHandle.getValue();
11728       else // Node simplified. Try simplifying again.
11729         N = Tmp;
11730     }
11731 
11732     if (N.getOpcode() != ISD::XOR)
11733       return N;
11734 
11735     SDNode *TheXor = N.getNode();
11736 
11737     SDValue Op0 = TheXor->getOperand(0);
11738     SDValue Op1 = TheXor->getOperand(1);
11739 
11740     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
11741       bool Equal = false;
11742       if (isOneConstant(Op0) && Op0.hasOneUse() &&
11743           Op0.getOpcode() == ISD::XOR) {
11744         TheXor = Op0.getNode();
11745         Equal = true;
11746       }
11747 
11748       EVT SetCCVT = N.getValueType();
11749       if (LegalTypes)
11750         SetCCVT = getSetCCResultType(SetCCVT);
11751       // Replace the uses of XOR with SETCC
11752       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
11753                           Equal ? ISD::SETEQ : ISD::SETNE);
11754     }
11755   }
11756 
11757   return SDValue();
11758 }
11759 
11760 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
11761 //
11762 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
11763   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
11764   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
11765 
11766   // If N is a constant we could fold this into a fallthrough or unconditional
11767   // branch. However that doesn't happen very often in normal code, because
11768   // Instcombine/SimplifyCFG should have handled the available opportunities.
11769   // If we did this folding here, it would be necessary to update the
11770   // MachineBasicBlock CFG, which is awkward.
11771 
11772   // Use SimplifySetCC to simplify SETCC's.
11773   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
11774                                CondLHS, CondRHS, CC->get(), SDLoc(N),
11775                                false);
11776   if (Simp.getNode()) AddToWorklist(Simp.getNode());
11777 
11778   // fold to a simpler setcc
11779   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
11780     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
11781                        N->getOperand(0), Simp.getOperand(2),
11782                        Simp.getOperand(0), Simp.getOperand(1),
11783                        N->getOperand(4));
11784 
11785   return SDValue();
11786 }
11787 
11788 /// Return true if 'Use' is a load or a store that uses N as its base pointer
11789 /// and that N may be folded in the load / store addressing mode.
11790 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
11791                                     SelectionDAG &DAG,
11792                                     const TargetLowering &TLI) {
11793   EVT VT;
11794   unsigned AS;
11795 
11796   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
11797     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
11798       return false;
11799     VT = LD->getMemoryVT();
11800     AS = LD->getAddressSpace();
11801   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
11802     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
11803       return false;
11804     VT = ST->getMemoryVT();
11805     AS = ST->getAddressSpace();
11806   } else
11807     return false;
11808 
11809   TargetLowering::AddrMode AM;
11810   if (N->getOpcode() == ISD::ADD) {
11811     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11812     if (Offset)
11813       // [reg +/- imm]
11814       AM.BaseOffs = Offset->getSExtValue();
11815     else
11816       // [reg +/- reg]
11817       AM.Scale = 1;
11818   } else if (N->getOpcode() == ISD::SUB) {
11819     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
11820     if (Offset)
11821       // [reg +/- imm]
11822       AM.BaseOffs = -Offset->getSExtValue();
11823     else
11824       // [reg +/- reg]
11825       AM.Scale = 1;
11826   } else
11827     return false;
11828 
11829   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
11830                                    VT.getTypeForEVT(*DAG.getContext()), AS);
11831 }
11832 
11833 /// Try turning a load/store into a pre-indexed load/store when the base
11834 /// pointer is an add or subtract and it has other uses besides the load/store.
11835 /// After the transformation, the new indexed load/store has effectively folded
11836 /// the add/subtract in and all of its other uses are redirected to the
11837 /// new load/store.
11838 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
11839   if (Level < AfterLegalizeDAG)
11840     return false;
11841 
11842   bool isLoad = true;
11843   SDValue Ptr;
11844   EVT VT;
11845   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
11846     if (LD->isIndexed())
11847       return false;
11848     VT = LD->getMemoryVT();
11849     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
11850         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
11851       return false;
11852     Ptr = LD->getBasePtr();
11853   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
11854     if (ST->isIndexed())
11855       return false;
11856     VT = ST->getMemoryVT();
11857     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
11858         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
11859       return false;
11860     Ptr = ST->getBasePtr();
11861     isLoad = false;
11862   } else {
11863     return false;
11864   }
11865 
11866   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
11867   // out.  There is no reason to make this a preinc/predec.
11868   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
11869       Ptr.getNode()->hasOneUse())
11870     return false;
11871 
11872   // Ask the target to do addressing mode selection.
11873   SDValue BasePtr;
11874   SDValue Offset;
11875   ISD::MemIndexedMode AM = ISD::UNINDEXED;
11876   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
11877     return false;
11878 
11879   // Backends without true r+i pre-indexed forms may need to pass a
11880   // constant base with a variable offset so that constant coercion
11881   // will work with the patterns in canonical form.
11882   bool Swapped = false;
11883   if (isa<ConstantSDNode>(BasePtr)) {
11884     std::swap(BasePtr, Offset);
11885     Swapped = true;
11886   }
11887 
11888   // Don't create a indexed load / store with zero offset.
11889   if (isNullConstant(Offset))
11890     return false;
11891 
11892   // Try turning it into a pre-indexed load / store except when:
11893   // 1) The new base ptr is a frame index.
11894   // 2) If N is a store and the new base ptr is either the same as or is a
11895   //    predecessor of the value being stored.
11896   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
11897   //    that would create a cycle.
11898   // 4) All uses are load / store ops that use it as old base ptr.
11899 
11900   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
11901   // (plus the implicit offset) to a register to preinc anyway.
11902   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
11903     return false;
11904 
11905   // Check #2.
11906   if (!isLoad) {
11907     SDValue Val = cast<StoreSDNode>(N)->getValue();
11908     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
11909       return false;
11910   }
11911 
11912   // Caches for hasPredecessorHelper.
11913   SmallPtrSet<const SDNode *, 32> Visited;
11914   SmallVector<const SDNode *, 16> Worklist;
11915   Worklist.push_back(N);
11916 
11917   // If the offset is a constant, there may be other adds of constants that
11918   // can be folded with this one. We should do this to avoid having to keep
11919   // a copy of the original base pointer.
11920   SmallVector<SDNode *, 16> OtherUses;
11921   if (isa<ConstantSDNode>(Offset))
11922     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
11923                               UE = BasePtr.getNode()->use_end();
11924          UI != UE; ++UI) {
11925       SDUse &Use = UI.getUse();
11926       // Skip the use that is Ptr and uses of other results from BasePtr's
11927       // node (important for nodes that return multiple results).
11928       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
11929         continue;
11930 
11931       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
11932         continue;
11933 
11934       if (Use.getUser()->getOpcode() != ISD::ADD &&
11935           Use.getUser()->getOpcode() != ISD::SUB) {
11936         OtherUses.clear();
11937         break;
11938       }
11939 
11940       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
11941       if (!isa<ConstantSDNode>(Op1)) {
11942         OtherUses.clear();
11943         break;
11944       }
11945 
11946       // FIXME: In some cases, we can be smarter about this.
11947       if (Op1.getValueType() != Offset.getValueType()) {
11948         OtherUses.clear();
11949         break;
11950       }
11951 
11952       OtherUses.push_back(Use.getUser());
11953     }
11954 
11955   if (Swapped)
11956     std::swap(BasePtr, Offset);
11957 
11958   // Now check for #3 and #4.
11959   bool RealUse = false;
11960 
11961   for (SDNode *Use : Ptr.getNode()->uses()) {
11962     if (Use == N)
11963       continue;
11964     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
11965       return false;
11966 
11967     // If Ptr may be folded in addressing mode of other use, then it's
11968     // not profitable to do this transformation.
11969     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
11970       RealUse = true;
11971   }
11972 
11973   if (!RealUse)
11974     return false;
11975 
11976   SDValue Result;
11977   if (isLoad)
11978     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
11979                                 BasePtr, Offset, AM);
11980   else
11981     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
11982                                  BasePtr, Offset, AM);
11983   ++PreIndexedNodes;
11984   ++NodesCombined;
11985   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
11986              Result.getNode()->dump(&DAG); dbgs() << '\n');
11987   WorklistRemover DeadNodes(*this);
11988   if (isLoad) {
11989     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
11990     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
11991   } else {
11992     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
11993   }
11994 
11995   // Finally, since the node is now dead, remove it from the graph.
11996   deleteAndRecombine(N);
11997 
11998   if (Swapped)
11999     std::swap(BasePtr, Offset);
12000 
12001   // Replace other uses of BasePtr that can be updated to use Ptr
12002   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12003     unsigned OffsetIdx = 1;
12004     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12005       OffsetIdx = 0;
12006     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12007            BasePtr.getNode() && "Expected BasePtr operand");
12008 
12009     // We need to replace ptr0 in the following expression:
12010     //   x0 * offset0 + y0 * ptr0 = t0
12011     // knowing that
12012     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12013     //
12014     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12015     // indexed load/store and the expression that needs to be re-written.
12016     //
12017     // Therefore, we have:
12018     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12019 
12020     ConstantSDNode *CN =
12021       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12022     int X0, X1, Y0, Y1;
12023     const APInt &Offset0 = CN->getAPIntValue();
12024     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12025 
12026     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12027     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12028     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12029     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12030 
12031     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12032 
12033     APInt CNV = Offset0;
12034     if (X0 < 0) CNV = -CNV;
12035     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12036     else CNV = CNV - Offset1;
12037 
12038     SDLoc DL(OtherUses[i]);
12039 
12040     // We can now generate the new expression.
12041     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12042     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12043 
12044     SDValue NewUse = DAG.getNode(Opcode,
12045                                  DL,
12046                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12047     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12048     deleteAndRecombine(OtherUses[i]);
12049   }
12050 
12051   // Replace the uses of Ptr with uses of the updated base value.
12052   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12053   deleteAndRecombine(Ptr.getNode());
12054   AddToWorklist(Result.getNode());
12055 
12056   return true;
12057 }
12058 
12059 /// Try to combine a load/store with a add/sub of the base pointer node into a
12060 /// post-indexed load/store. The transformation folded the add/subtract into the
12061 /// new indexed load/store effectively and all of its uses are redirected to the
12062 /// new load/store.
12063 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12064   if (Level < AfterLegalizeDAG)
12065     return false;
12066 
12067   bool isLoad = true;
12068   SDValue Ptr;
12069   EVT VT;
12070   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12071     if (LD->isIndexed())
12072       return false;
12073     VT = LD->getMemoryVT();
12074     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12075         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12076       return false;
12077     Ptr = LD->getBasePtr();
12078   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12079     if (ST->isIndexed())
12080       return false;
12081     VT = ST->getMemoryVT();
12082     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12083         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12084       return false;
12085     Ptr = ST->getBasePtr();
12086     isLoad = false;
12087   } else {
12088     return false;
12089   }
12090 
12091   if (Ptr.getNode()->hasOneUse())
12092     return false;
12093 
12094   for (SDNode *Op : Ptr.getNode()->uses()) {
12095     if (Op == N ||
12096         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12097       continue;
12098 
12099     SDValue BasePtr;
12100     SDValue Offset;
12101     ISD::MemIndexedMode AM = ISD::UNINDEXED;
12102     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12103       // Don't create a indexed load / store with zero offset.
12104       if (isNullConstant(Offset))
12105         continue;
12106 
12107       // Try turning it into a post-indexed load / store except when
12108       // 1) All uses are load / store ops that use it as base ptr (and
12109       //    it may be folded as addressing mmode).
12110       // 2) Op must be independent of N, i.e. Op is neither a predecessor
12111       //    nor a successor of N. Otherwise, if Op is folded that would
12112       //    create a cycle.
12113 
12114       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12115         continue;
12116 
12117       // Check for #1.
12118       bool TryNext = false;
12119       for (SDNode *Use : BasePtr.getNode()->uses()) {
12120         if (Use == Ptr.getNode())
12121           continue;
12122 
12123         // If all the uses are load / store addresses, then don't do the
12124         // transformation.
12125         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12126           bool RealUse = false;
12127           for (SDNode *UseUse : Use->uses()) {
12128             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12129               RealUse = true;
12130           }
12131 
12132           if (!RealUse) {
12133             TryNext = true;
12134             break;
12135           }
12136         }
12137       }
12138 
12139       if (TryNext)
12140         continue;
12141 
12142       // Check for #2
12143       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
12144         SDValue Result = isLoad
12145           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12146                                BasePtr, Offset, AM)
12147           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12148                                 BasePtr, Offset, AM);
12149         ++PostIndexedNodes;
12150         ++NodesCombined;
12151         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12152                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12153                    dbgs() << '\n');
12154         WorklistRemover DeadNodes(*this);
12155         if (isLoad) {
12156           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12157           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12158         } else {
12159           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12160         }
12161 
12162         // Finally, since the node is now dead, remove it from the graph.
12163         deleteAndRecombine(N);
12164 
12165         // Replace the uses of Use with uses of the updated base value.
12166         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12167                                       Result.getValue(isLoad ? 1 : 0));
12168         deleteAndRecombine(Op);
12169         return true;
12170       }
12171     }
12172   }
12173 
12174   return false;
12175 }
12176 
12177 /// Return the base-pointer arithmetic from an indexed \p LD.
12178 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12179   ISD::MemIndexedMode AM = LD->getAddressingMode();
12180   assert(AM != ISD::UNINDEXED);
12181   SDValue BP = LD->getOperand(1);
12182   SDValue Inc = LD->getOperand(2);
12183 
12184   // Some backends use TargetConstants for load offsets, but don't expect
12185   // TargetConstants in general ADD nodes. We can convert these constants into
12186   // regular Constants (if the constant is not opaque).
12187   assert((Inc.getOpcode() != ISD::TargetConstant ||
12188           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12189          "Cannot split out indexing using opaque target constants");
12190   if (Inc.getOpcode() == ISD::TargetConstant) {
12191     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12192     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12193                           ConstInc->getValueType(0));
12194   }
12195 
12196   unsigned Opc =
12197       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12198   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12199 }
12200 
12201 SDValue DAGCombiner::visitLOAD(SDNode *N) {
12202   LoadSDNode *LD  = cast<LoadSDNode>(N);
12203   SDValue Chain = LD->getChain();
12204   SDValue Ptr   = LD->getBasePtr();
12205 
12206   // If load is not volatile and there are no uses of the loaded value (and
12207   // the updated indexed value in case of indexed loads), change uses of the
12208   // chain value into uses of the chain input (i.e. delete the dead load).
12209   if (!LD->isVolatile()) {
12210     if (N->getValueType(1) == MVT::Other) {
12211       // Unindexed loads.
12212       if (!N->hasAnyUseOfValue(0)) {
12213         // It's not safe to use the two value CombineTo variant here. e.g.
12214         // v1, chain2 = load chain1, loc
12215         // v2, chain3 = load chain2, loc
12216         // v3         = add v2, c
12217         // Now we replace use of chain2 with chain1.  This makes the second load
12218         // isomorphic to the one we are deleting, and thus makes this load live.
12219         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12220                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12221                    dbgs() << "\n");
12222         WorklistRemover DeadNodes(*this);
12223         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12224         AddUsersToWorklist(Chain.getNode());
12225         if (N->use_empty())
12226           deleteAndRecombine(N);
12227 
12228         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12229       }
12230     } else {
12231       // Indexed loads.
12232       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12233 
12234       // If this load has an opaque TargetConstant offset, then we cannot split
12235       // the indexing into an add/sub directly (that TargetConstant may not be
12236       // valid for a different type of node, and we cannot convert an opaque
12237       // target constant into a regular constant).
12238       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12239                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12240 
12241       if (!N->hasAnyUseOfValue(0) &&
12242           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12243         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12244         SDValue Index;
12245         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12246           Index = SplitIndexingFromLoad(LD);
12247           // Try to fold the base pointer arithmetic into subsequent loads and
12248           // stores.
12249           AddUsersToWorklist(N);
12250         } else
12251           Index = DAG.getUNDEF(N->getValueType(1));
12252         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12253                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12254                    dbgs() << " and 2 other values\n");
12255         WorklistRemover DeadNodes(*this);
12256         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12257         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12258         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12259         deleteAndRecombine(N);
12260         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12261       }
12262     }
12263   }
12264 
12265   // If this load is directly stored, replace the load value with the stored
12266   // value.
12267   // TODO: Handle store large -> read small portion.
12268   // TODO: Handle TRUNCSTORE/LOADEXT
12269   if (OptLevel != CodeGenOpt::None &&
12270       ISD::isNormalLoad(N) && !LD->isVolatile()) {
12271     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
12272       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
12273       if (PrevST->getBasePtr() == Ptr &&
12274           PrevST->getValue().getValueType() == N->getValueType(0))
12275         return CombineTo(N, PrevST->getOperand(1), Chain);
12276     }
12277   }
12278 
12279   // Try to infer better alignment information than the load already has.
12280   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12281     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12282       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12283         SDValue NewLoad = DAG.getExtLoad(
12284             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12285             LD->getPointerInfo(), LD->getMemoryVT(), Align,
12286             LD->getMemOperand()->getFlags(), LD->getAAInfo());
12287         // NewLoad will always be N as we are only refining the alignment
12288         assert(NewLoad.getNode() == N);
12289         (void)NewLoad;
12290       }
12291     }
12292   }
12293 
12294   if (LD->isUnindexed()) {
12295     // Walk up chain skipping non-aliasing memory nodes.
12296     SDValue BetterChain = FindBetterChain(N, Chain);
12297 
12298     // If there is a better chain.
12299     if (Chain != BetterChain) {
12300       SDValue ReplLoad;
12301 
12302       // Replace the chain to void dependency.
12303       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
12304         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
12305                                BetterChain, Ptr, LD->getMemOperand());
12306       } else {
12307         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
12308                                   LD->getValueType(0),
12309                                   BetterChain, Ptr, LD->getMemoryVT(),
12310                                   LD->getMemOperand());
12311       }
12312 
12313       // Create token factor to keep old chain connected.
12314       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
12315                                   MVT::Other, Chain, ReplLoad.getValue(1));
12316 
12317       // Replace uses with load result and token factor
12318       return CombineTo(N, ReplLoad.getValue(0), Token);
12319     }
12320   }
12321 
12322   // Try transforming N to an indexed load.
12323   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12324     return SDValue(N, 0);
12325 
12326   // Try to slice up N to more direct loads if the slices are mapped to
12327   // different register banks or pairing can take place.
12328   if (SliceUpLoad(N))
12329     return SDValue(N, 0);
12330 
12331   return SDValue();
12332 }
12333 
12334 namespace {
12335 
12336 /// Helper structure used to slice a load in smaller loads.
12337 /// Basically a slice is obtained from the following sequence:
12338 /// Origin = load Ty1, Base
12339 /// Shift = srl Ty1 Origin, CstTy Amount
12340 /// Inst = trunc Shift to Ty2
12341 ///
12342 /// Then, it will be rewritten into:
12343 /// Slice = load SliceTy, Base + SliceOffset
12344 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
12345 ///
12346 /// SliceTy is deduced from the number of bits that are actually used to
12347 /// build Inst.
12348 struct LoadedSlice {
12349   /// Helper structure used to compute the cost of a slice.
12350   struct Cost {
12351     /// Are we optimizing for code size.
12352     bool ForCodeSize;
12353 
12354     /// Various cost.
12355     unsigned Loads = 0;
12356     unsigned Truncates = 0;
12357     unsigned CrossRegisterBanksCopies = 0;
12358     unsigned ZExts = 0;
12359     unsigned Shift = 0;
12360 
12361     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
12362 
12363     /// Get the cost of one isolated slice.
12364     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
12365         : ForCodeSize(ForCodeSize), Loads(1) {
12366       EVT TruncType = LS.Inst->getValueType(0);
12367       EVT LoadedType = LS.getLoadedType();
12368       if (TruncType != LoadedType &&
12369           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
12370         ZExts = 1;
12371     }
12372 
12373     /// Account for slicing gain in the current cost.
12374     /// Slicing provide a few gains like removing a shift or a
12375     /// truncate. This method allows to grow the cost of the original
12376     /// load with the gain from this slice.
12377     void addSliceGain(const LoadedSlice &LS) {
12378       // Each slice saves a truncate.
12379       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
12380       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
12381                               LS.Inst->getValueType(0)))
12382         ++Truncates;
12383       // If there is a shift amount, this slice gets rid of it.
12384       if (LS.Shift)
12385         ++Shift;
12386       // If this slice can merge a cross register bank copy, account for it.
12387       if (LS.canMergeExpensiveCrossRegisterBankCopy())
12388         ++CrossRegisterBanksCopies;
12389     }
12390 
12391     Cost &operator+=(const Cost &RHS) {
12392       Loads += RHS.Loads;
12393       Truncates += RHS.Truncates;
12394       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
12395       ZExts += RHS.ZExts;
12396       Shift += RHS.Shift;
12397       return *this;
12398     }
12399 
12400     bool operator==(const Cost &RHS) const {
12401       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
12402              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
12403              ZExts == RHS.ZExts && Shift == RHS.Shift;
12404     }
12405 
12406     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
12407 
12408     bool operator<(const Cost &RHS) const {
12409       // Assume cross register banks copies are as expensive as loads.
12410       // FIXME: Do we want some more target hooks?
12411       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
12412       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
12413       // Unless we are optimizing for code size, consider the
12414       // expensive operation first.
12415       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
12416         return ExpensiveOpsLHS < ExpensiveOpsRHS;
12417       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
12418              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
12419     }
12420 
12421     bool operator>(const Cost &RHS) const { return RHS < *this; }
12422 
12423     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
12424 
12425     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
12426   };
12427 
12428   // The last instruction that represent the slice. This should be a
12429   // truncate instruction.
12430   SDNode *Inst;
12431 
12432   // The original load instruction.
12433   LoadSDNode *Origin;
12434 
12435   // The right shift amount in bits from the original load.
12436   unsigned Shift;
12437 
12438   // The DAG from which Origin came from.
12439   // This is used to get some contextual information about legal types, etc.
12440   SelectionDAG *DAG;
12441 
12442   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
12443               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
12444       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
12445 
12446   /// Get the bits used in a chunk of bits \p BitWidth large.
12447   /// \return Result is \p BitWidth and has used bits set to 1 and
12448   ///         not used bits set to 0.
12449   APInt getUsedBits() const {
12450     // Reproduce the trunc(lshr) sequence:
12451     // - Start from the truncated value.
12452     // - Zero extend to the desired bit width.
12453     // - Shift left.
12454     assert(Origin && "No original load to compare against.");
12455     unsigned BitWidth = Origin->getValueSizeInBits(0);
12456     assert(Inst && "This slice is not bound to an instruction");
12457     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
12458            "Extracted slice is bigger than the whole type!");
12459     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
12460     UsedBits.setAllBits();
12461     UsedBits = UsedBits.zext(BitWidth);
12462     UsedBits <<= Shift;
12463     return UsedBits;
12464   }
12465 
12466   /// Get the size of the slice to be loaded in bytes.
12467   unsigned getLoadedSize() const {
12468     unsigned SliceSize = getUsedBits().countPopulation();
12469     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
12470     return SliceSize / 8;
12471   }
12472 
12473   /// Get the type that will be loaded for this slice.
12474   /// Note: This may not be the final type for the slice.
12475   EVT getLoadedType() const {
12476     assert(DAG && "Missing context");
12477     LLVMContext &Ctxt = *DAG->getContext();
12478     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
12479   }
12480 
12481   /// Get the alignment of the load used for this slice.
12482   unsigned getAlignment() const {
12483     unsigned Alignment = Origin->getAlignment();
12484     unsigned Offset = getOffsetFromBase();
12485     if (Offset != 0)
12486       Alignment = MinAlign(Alignment, Alignment + Offset);
12487     return Alignment;
12488   }
12489 
12490   /// Check if this slice can be rewritten with legal operations.
12491   bool isLegal() const {
12492     // An invalid slice is not legal.
12493     if (!Origin || !Inst || !DAG)
12494       return false;
12495 
12496     // Offsets are for indexed load only, we do not handle that.
12497     if (!Origin->getOffset().isUndef())
12498       return false;
12499 
12500     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12501 
12502     // Check that the type is legal.
12503     EVT SliceType = getLoadedType();
12504     if (!TLI.isTypeLegal(SliceType))
12505       return false;
12506 
12507     // Check that the load is legal for this type.
12508     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12509       return false;
12510 
12511     // Check that the offset can be computed.
12512     // 1. Check its type.
12513     EVT PtrType = Origin->getBasePtr().getValueType();
12514     if (PtrType == MVT::Untyped || PtrType.isExtended())
12515       return false;
12516 
12517     // 2. Check that it fits in the immediate.
12518     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12519       return false;
12520 
12521     // 3. Check that the computation is legal.
12522     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12523       return false;
12524 
12525     // Check that the zext is legal if it needs one.
12526     EVT TruncateType = Inst->getValueType(0);
12527     if (TruncateType != SliceType &&
12528         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12529       return false;
12530 
12531     return true;
12532   }
12533 
12534   /// Get the offset in bytes of this slice in the original chunk of
12535   /// bits.
12536   /// \pre DAG != nullptr.
12537   uint64_t getOffsetFromBase() const {
12538     assert(DAG && "Missing context.");
12539     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12540     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12541     uint64_t Offset = Shift / 8;
12542     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12543     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12544            "The size of the original loaded type is not a multiple of a"
12545            " byte.");
12546     // If Offset is bigger than TySizeInBytes, it means we are loading all
12547     // zeros. This should have been optimized before in the process.
12548     assert(TySizeInBytes > Offset &&
12549            "Invalid shift amount for given loaded size");
12550     if (IsBigEndian)
12551       Offset = TySizeInBytes - Offset - getLoadedSize();
12552     return Offset;
12553   }
12554 
12555   /// Generate the sequence of instructions to load the slice
12556   /// represented by this object and redirect the uses of this slice to
12557   /// this new sequence of instructions.
12558   /// \pre this->Inst && this->Origin are valid Instructions and this
12559   /// object passed the legal check: LoadedSlice::isLegal returned true.
12560   /// \return The last instruction of the sequence used to load the slice.
12561   SDValue loadSlice() const {
12562     assert(Inst && Origin && "Unable to replace a non-existing slice.");
12563     const SDValue &OldBaseAddr = Origin->getBasePtr();
12564     SDValue BaseAddr = OldBaseAddr;
12565     // Get the offset in that chunk of bytes w.r.t. the endianness.
12566     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12567     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12568     if (Offset) {
12569       // BaseAddr = BaseAddr + Offset.
12570       EVT ArithType = BaseAddr.getValueType();
12571       SDLoc DL(Origin);
12572       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12573                               DAG->getConstant(Offset, DL, ArithType));
12574     }
12575 
12576     // Create the type of the loaded slice according to its size.
12577     EVT SliceType = getLoadedType();
12578 
12579     // Create the load for the slice.
12580     SDValue LastInst =
12581         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12582                      Origin->getPointerInfo().getWithOffset(Offset),
12583                      getAlignment(), Origin->getMemOperand()->getFlags());
12584     // If the final type is not the same as the loaded type, this means that
12585     // we have to pad with zero. Create a zero extend for that.
12586     EVT FinalType = Inst->getValueType(0);
12587     if (SliceType != FinalType)
12588       LastInst =
12589           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12590     return LastInst;
12591   }
12592 
12593   /// Check if this slice can be merged with an expensive cross register
12594   /// bank copy. E.g.,
12595   /// i = load i32
12596   /// f = bitcast i32 i to float
12597   bool canMergeExpensiveCrossRegisterBankCopy() const {
12598     if (!Inst || !Inst->hasOneUse())
12599       return false;
12600     SDNode *Use = *Inst->use_begin();
12601     if (Use->getOpcode() != ISD::BITCAST)
12602       return false;
12603     assert(DAG && "Missing context");
12604     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12605     EVT ResVT = Use->getValueType(0);
12606     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12607     const TargetRegisterClass *ArgRC =
12608         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12609     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12610       return false;
12611 
12612     // At this point, we know that we perform a cross-register-bank copy.
12613     // Check if it is expensive.
12614     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12615     // Assume bitcasts are cheap, unless both register classes do not
12616     // explicitly share a common sub class.
12617     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
12618       return false;
12619 
12620     // Check if it will be merged with the load.
12621     // 1. Check the alignment constraint.
12622     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
12623         ResVT.getTypeForEVT(*DAG->getContext()));
12624 
12625     if (RequiredAlignment > getAlignment())
12626       return false;
12627 
12628     // 2. Check that the load is a legal operation for that type.
12629     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
12630       return false;
12631 
12632     // 3. Check that we do not have a zext in the way.
12633     if (Inst->getValueType(0) != getLoadedType())
12634       return false;
12635 
12636     return true;
12637   }
12638 };
12639 
12640 } // end anonymous namespace
12641 
12642 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
12643 /// \p UsedBits looks like 0..0 1..1 0..0.
12644 static bool areUsedBitsDense(const APInt &UsedBits) {
12645   // If all the bits are one, this is dense!
12646   if (UsedBits.isAllOnesValue())
12647     return true;
12648 
12649   // Get rid of the unused bits on the right.
12650   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
12651   // Get rid of the unused bits on the left.
12652   if (NarrowedUsedBits.countLeadingZeros())
12653     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
12654   // Check that the chunk of bits is completely used.
12655   return NarrowedUsedBits.isAllOnesValue();
12656 }
12657 
12658 /// Check whether or not \p First and \p Second are next to each other
12659 /// in memory. This means that there is no hole between the bits loaded
12660 /// by \p First and the bits loaded by \p Second.
12661 static bool areSlicesNextToEachOther(const LoadedSlice &First,
12662                                      const LoadedSlice &Second) {
12663   assert(First.Origin == Second.Origin && First.Origin &&
12664          "Unable to match different memory origins.");
12665   APInt UsedBits = First.getUsedBits();
12666   assert((UsedBits & Second.getUsedBits()) == 0 &&
12667          "Slices are not supposed to overlap.");
12668   UsedBits |= Second.getUsedBits();
12669   return areUsedBitsDense(UsedBits);
12670 }
12671 
12672 /// Adjust the \p GlobalLSCost according to the target
12673 /// paring capabilities and the layout of the slices.
12674 /// \pre \p GlobalLSCost should account for at least as many loads as
12675 /// there is in the slices in \p LoadedSlices.
12676 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12677                                  LoadedSlice::Cost &GlobalLSCost) {
12678   unsigned NumberOfSlices = LoadedSlices.size();
12679   // If there is less than 2 elements, no pairing is possible.
12680   if (NumberOfSlices < 2)
12681     return;
12682 
12683   // Sort the slices so that elements that are likely to be next to each
12684   // other in memory are next to each other in the list.
12685   llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
12686              [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
12687     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
12688     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
12689   });
12690   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
12691   // First (resp. Second) is the first (resp. Second) potentially candidate
12692   // to be placed in a paired load.
12693   const LoadedSlice *First = nullptr;
12694   const LoadedSlice *Second = nullptr;
12695   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
12696                 // Set the beginning of the pair.
12697                                                            First = Second) {
12698     Second = &LoadedSlices[CurrSlice];
12699 
12700     // If First is NULL, it means we start a new pair.
12701     // Get to the next slice.
12702     if (!First)
12703       continue;
12704 
12705     EVT LoadedType = First->getLoadedType();
12706 
12707     // If the types of the slices are different, we cannot pair them.
12708     if (LoadedType != Second->getLoadedType())
12709       continue;
12710 
12711     // Check if the target supplies paired loads for this type.
12712     unsigned RequiredAlignment = 0;
12713     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
12714       // move to the next pair, this type is hopeless.
12715       Second = nullptr;
12716       continue;
12717     }
12718     // Check if we meet the alignment requirement.
12719     if (RequiredAlignment > First->getAlignment())
12720       continue;
12721 
12722     // Check that both loads are next to each other in memory.
12723     if (!areSlicesNextToEachOther(*First, *Second))
12724       continue;
12725 
12726     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
12727     --GlobalLSCost.Loads;
12728     // Move to the next pair.
12729     Second = nullptr;
12730   }
12731 }
12732 
12733 /// Check the profitability of all involved LoadedSlice.
12734 /// Currently, it is considered profitable if there is exactly two
12735 /// involved slices (1) which are (2) next to each other in memory, and
12736 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
12737 ///
12738 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
12739 /// the elements themselves.
12740 ///
12741 /// FIXME: When the cost model will be mature enough, we can relax
12742 /// constraints (1) and (2).
12743 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
12744                                 const APInt &UsedBits, bool ForCodeSize) {
12745   unsigned NumberOfSlices = LoadedSlices.size();
12746   if (StressLoadSlicing)
12747     return NumberOfSlices > 1;
12748 
12749   // Check (1).
12750   if (NumberOfSlices != 2)
12751     return false;
12752 
12753   // Check (2).
12754   if (!areUsedBitsDense(UsedBits))
12755     return false;
12756 
12757   // Check (3).
12758   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
12759   // The original code has one big load.
12760   OrigCost.Loads = 1;
12761   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
12762     const LoadedSlice &LS = LoadedSlices[CurrSlice];
12763     // Accumulate the cost of all the slices.
12764     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
12765     GlobalSlicingCost += SliceCost;
12766 
12767     // Account as cost in the original configuration the gain obtained
12768     // with the current slices.
12769     OrigCost.addSliceGain(LS);
12770   }
12771 
12772   // If the target supports paired load, adjust the cost accordingly.
12773   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
12774   return OrigCost > GlobalSlicingCost;
12775 }
12776 
12777 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
12778 /// operations, split it in the various pieces being extracted.
12779 ///
12780 /// This sort of thing is introduced by SROA.
12781 /// This slicing takes care not to insert overlapping loads.
12782 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
12783 bool DAGCombiner::SliceUpLoad(SDNode *N) {
12784   if (Level < AfterLegalizeDAG)
12785     return false;
12786 
12787   LoadSDNode *LD = cast<LoadSDNode>(N);
12788   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
12789       !LD->getValueType(0).isInteger())
12790     return false;
12791 
12792   // Keep track of already used bits to detect overlapping values.
12793   // In that case, we will just abort the transformation.
12794   APInt UsedBits(LD->getValueSizeInBits(0), 0);
12795 
12796   SmallVector<LoadedSlice, 4> LoadedSlices;
12797 
12798   // Check if this load is used as several smaller chunks of bits.
12799   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
12800   // of computation for each trunc.
12801   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
12802        UI != UIEnd; ++UI) {
12803     // Skip the uses of the chain.
12804     if (UI.getUse().getResNo() != 0)
12805       continue;
12806 
12807     SDNode *User = *UI;
12808     unsigned Shift = 0;
12809 
12810     // Check if this is a trunc(lshr).
12811     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
12812         isa<ConstantSDNode>(User->getOperand(1))) {
12813       Shift = User->getConstantOperandVal(1);
12814       User = *User->use_begin();
12815     }
12816 
12817     // At this point, User is a Truncate, iff we encountered, trunc or
12818     // trunc(lshr).
12819     if (User->getOpcode() != ISD::TRUNCATE)
12820       return false;
12821 
12822     // The width of the type must be a power of 2 and greater than 8-bits.
12823     // Otherwise the load cannot be represented in LLVM IR.
12824     // Moreover, if we shifted with a non-8-bits multiple, the slice
12825     // will be across several bytes. We do not support that.
12826     unsigned Width = User->getValueSizeInBits(0);
12827     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
12828       return false;
12829 
12830     // Build the slice for this chain of computations.
12831     LoadedSlice LS(User, LD, Shift, &DAG);
12832     APInt CurrentUsedBits = LS.getUsedBits();
12833 
12834     // Check if this slice overlaps with another.
12835     if ((CurrentUsedBits & UsedBits) != 0)
12836       return false;
12837     // Update the bits used globally.
12838     UsedBits |= CurrentUsedBits;
12839 
12840     // Check if the new slice would be legal.
12841     if (!LS.isLegal())
12842       return false;
12843 
12844     // Record the slice.
12845     LoadedSlices.push_back(LS);
12846   }
12847 
12848   // Abort slicing if it does not seem to be profitable.
12849   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
12850     return false;
12851 
12852   ++SlicedLoads;
12853 
12854   // Rewrite each chain to use an independent load.
12855   // By construction, each chain can be represented by a unique load.
12856 
12857   // Prepare the argument for the new token factor for all the slices.
12858   SmallVector<SDValue, 8> ArgChains;
12859   for (SmallVectorImpl<LoadedSlice>::const_iterator
12860            LSIt = LoadedSlices.begin(),
12861            LSItEnd = LoadedSlices.end();
12862        LSIt != LSItEnd; ++LSIt) {
12863     SDValue SliceInst = LSIt->loadSlice();
12864     CombineTo(LSIt->Inst, SliceInst, true);
12865     if (SliceInst.getOpcode() != ISD::LOAD)
12866       SliceInst = SliceInst.getOperand(0);
12867     assert(SliceInst->getOpcode() == ISD::LOAD &&
12868            "It takes more than a zext to get to the loaded slice!!");
12869     ArgChains.push_back(SliceInst.getValue(1));
12870   }
12871 
12872   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
12873                               ArgChains);
12874   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12875   AddToWorklist(Chain.getNode());
12876   return true;
12877 }
12878 
12879 /// Check to see if V is (and load (ptr), imm), where the load is having
12880 /// specific bytes cleared out.  If so, return the byte size being masked out
12881 /// and the shift amount.
12882 static std::pair<unsigned, unsigned>
12883 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
12884   std::pair<unsigned, unsigned> Result(0, 0);
12885 
12886   // Check for the structure we're looking for.
12887   if (V->getOpcode() != ISD::AND ||
12888       !isa<ConstantSDNode>(V->getOperand(1)) ||
12889       !ISD::isNormalLoad(V->getOperand(0).getNode()))
12890     return Result;
12891 
12892   // Check the chain and pointer.
12893   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
12894   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
12895 
12896   // The store should be chained directly to the load or be an operand of a
12897   // tokenfactor.
12898   if (LD == Chain.getNode())
12899     ; // ok.
12900   else if (Chain->getOpcode() != ISD::TokenFactor)
12901     return Result; // Fail.
12902   else {
12903     bool isOk = false;
12904     for (const SDValue &ChainOp : Chain->op_values())
12905       if (ChainOp.getNode() == LD) {
12906         isOk = true;
12907         break;
12908       }
12909     if (!isOk) return Result;
12910   }
12911 
12912   // This only handles simple types.
12913   if (V.getValueType() != MVT::i16 &&
12914       V.getValueType() != MVT::i32 &&
12915       V.getValueType() != MVT::i64)
12916     return Result;
12917 
12918   // Check the constant mask.  Invert it so that the bits being masked out are
12919   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
12920   // follow the sign bit for uniformity.
12921   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
12922   unsigned NotMaskLZ = countLeadingZeros(NotMask);
12923   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
12924   unsigned NotMaskTZ = countTrailingZeros(NotMask);
12925   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
12926   if (NotMaskLZ == 64) return Result;  // All zero mask.
12927 
12928   // See if we have a continuous run of bits.  If so, we have 0*1+0*
12929   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
12930     return Result;
12931 
12932   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
12933   if (V.getValueType() != MVT::i64 && NotMaskLZ)
12934     NotMaskLZ -= 64-V.getValueSizeInBits();
12935 
12936   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
12937   switch (MaskedBytes) {
12938   case 1:
12939   case 2:
12940   case 4: break;
12941   default: return Result; // All one mask, or 5-byte mask.
12942   }
12943 
12944   // Verify that the first bit starts at a multiple of mask so that the access
12945   // is aligned the same as the access width.
12946   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
12947 
12948   Result.first = MaskedBytes;
12949   Result.second = NotMaskTZ/8;
12950   return Result;
12951 }
12952 
12953 /// Check to see if IVal is something that provides a value as specified by
12954 /// MaskInfo. If so, replace the specified store with a narrower store of
12955 /// truncated IVal.
12956 static SDNode *
12957 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
12958                                 SDValue IVal, StoreSDNode *St,
12959                                 DAGCombiner *DC) {
12960   unsigned NumBytes = MaskInfo.first;
12961   unsigned ByteShift = MaskInfo.second;
12962   SelectionDAG &DAG = DC->getDAG();
12963 
12964   // Check to see if IVal is all zeros in the part being masked in by the 'or'
12965   // that uses this.  If not, this is not a replacement.
12966   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
12967                                   ByteShift*8, (ByteShift+NumBytes)*8);
12968   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
12969 
12970   // Check that it is legal on the target to do this.  It is legal if the new
12971   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
12972   // legalization.
12973   MVT VT = MVT::getIntegerVT(NumBytes*8);
12974   if (!DC->isTypeLegal(VT))
12975     return nullptr;
12976 
12977   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
12978   // shifted by ByteShift and truncated down to NumBytes.
12979   if (ByteShift) {
12980     SDLoc DL(IVal);
12981     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
12982                        DAG.getConstant(ByteShift*8, DL,
12983                                     DC->getShiftAmountTy(IVal.getValueType())));
12984   }
12985 
12986   // Figure out the offset for the store and the alignment of the access.
12987   unsigned StOffset;
12988   unsigned NewAlign = St->getAlignment();
12989 
12990   if (DAG.getDataLayout().isLittleEndian())
12991     StOffset = ByteShift;
12992   else
12993     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
12994 
12995   SDValue Ptr = St->getBasePtr();
12996   if (StOffset) {
12997     SDLoc DL(IVal);
12998     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
12999                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13000     NewAlign = MinAlign(NewAlign, StOffset);
13001   }
13002 
13003   // Truncate down to the new size.
13004   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13005 
13006   ++OpsNarrowed;
13007   return DAG
13008       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13009                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13010       .getNode();
13011 }
13012 
13013 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13014 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13015 /// narrowing the load and store if it would end up being a win for performance
13016 /// or code size.
13017 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13018   StoreSDNode *ST  = cast<StoreSDNode>(N);
13019   if (ST->isVolatile())
13020     return SDValue();
13021 
13022   SDValue Chain = ST->getChain();
13023   SDValue Value = ST->getValue();
13024   SDValue Ptr   = ST->getBasePtr();
13025   EVT VT = Value.getValueType();
13026 
13027   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13028     return SDValue();
13029 
13030   unsigned Opc = Value.getOpcode();
13031 
13032   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13033   // is a byte mask indicating a consecutive number of bytes, check to see if
13034   // Y is known to provide just those bytes.  If so, we try to replace the
13035   // load + replace + store sequence with a single (narrower) store, which makes
13036   // the load dead.
13037   if (Opc == ISD::OR) {
13038     std::pair<unsigned, unsigned> MaskedLoad;
13039     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13040     if (MaskedLoad.first)
13041       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13042                                                   Value.getOperand(1), ST,this))
13043         return SDValue(NewST, 0);
13044 
13045     // Or is commutative, so try swapping X and Y.
13046     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13047     if (MaskedLoad.first)
13048       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13049                                                   Value.getOperand(0), ST,this))
13050         return SDValue(NewST, 0);
13051   }
13052 
13053   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13054       Value.getOperand(1).getOpcode() != ISD::Constant)
13055     return SDValue();
13056 
13057   SDValue N0 = Value.getOperand(0);
13058   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13059       Chain == SDValue(N0.getNode(), 1)) {
13060     LoadSDNode *LD = cast<LoadSDNode>(N0);
13061     if (LD->getBasePtr() != Ptr ||
13062         LD->getPointerInfo().getAddrSpace() !=
13063         ST->getPointerInfo().getAddrSpace())
13064       return SDValue();
13065 
13066     // Find the type to narrow it the load / op / store to.
13067     SDValue N1 = Value.getOperand(1);
13068     unsigned BitWidth = N1.getValueSizeInBits();
13069     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13070     if (Opc == ISD::AND)
13071       Imm ^= APInt::getAllOnesValue(BitWidth);
13072     if (Imm == 0 || Imm.isAllOnesValue())
13073       return SDValue();
13074     unsigned ShAmt = Imm.countTrailingZeros();
13075     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13076     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13077     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13078     // The narrowing should be profitable, the load/store operation should be
13079     // legal (or custom) and the store size should be equal to the NewVT width.
13080     while (NewBW < BitWidth &&
13081            (NewVT.getStoreSizeInBits() != NewBW ||
13082             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13083             !TLI.isNarrowingProfitable(VT, NewVT))) {
13084       NewBW = NextPowerOf2(NewBW);
13085       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13086     }
13087     if (NewBW >= BitWidth)
13088       return SDValue();
13089 
13090     // If the lsb changed does not start at the type bitwidth boundary,
13091     // start at the previous one.
13092     if (ShAmt % NewBW)
13093       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13094     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13095                                    std::min(BitWidth, ShAmt + NewBW));
13096     if ((Imm & Mask) == Imm) {
13097       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13098       if (Opc == ISD::AND)
13099         NewImm ^= APInt::getAllOnesValue(NewBW);
13100       uint64_t PtrOff = ShAmt / 8;
13101       // For big endian targets, we need to adjust the offset to the pointer to
13102       // load the correct bytes.
13103       if (DAG.getDataLayout().isBigEndian())
13104         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13105 
13106       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13107       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13108       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13109         return SDValue();
13110 
13111       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13112                                    Ptr.getValueType(), Ptr,
13113                                    DAG.getConstant(PtrOff, SDLoc(LD),
13114                                                    Ptr.getValueType()));
13115       SDValue NewLD =
13116           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13117                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13118                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
13119       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13120                                    DAG.getConstant(NewImm, SDLoc(Value),
13121                                                    NewVT));
13122       SDValue NewST =
13123           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13124                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13125 
13126       AddToWorklist(NewPtr.getNode());
13127       AddToWorklist(NewLD.getNode());
13128       AddToWorklist(NewVal.getNode());
13129       WorklistRemover DeadNodes(*this);
13130       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13131       ++OpsNarrowed;
13132       return NewST;
13133     }
13134   }
13135 
13136   return SDValue();
13137 }
13138 
13139 /// For a given floating point load / store pair, if the load value isn't used
13140 /// by any other operations, then consider transforming the pair to integer
13141 /// load / store operations if the target deems the transformation profitable.
13142 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13143   StoreSDNode *ST  = cast<StoreSDNode>(N);
13144   SDValue Chain = ST->getChain();
13145   SDValue Value = ST->getValue();
13146   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13147       Value.hasOneUse() &&
13148       Chain == SDValue(Value.getNode(), 1)) {
13149     LoadSDNode *LD = cast<LoadSDNode>(Value);
13150     EVT VT = LD->getMemoryVT();
13151     if (!VT.isFloatingPoint() ||
13152         VT != ST->getMemoryVT() ||
13153         LD->isNonTemporal() ||
13154         ST->isNonTemporal() ||
13155         LD->getPointerInfo().getAddrSpace() != 0 ||
13156         ST->getPointerInfo().getAddrSpace() != 0)
13157       return SDValue();
13158 
13159     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13160     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13161         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13162         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13163         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13164       return SDValue();
13165 
13166     unsigned LDAlign = LD->getAlignment();
13167     unsigned STAlign = ST->getAlignment();
13168     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13169     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13170     if (LDAlign < ABIAlign || STAlign < ABIAlign)
13171       return SDValue();
13172 
13173     SDValue NewLD =
13174         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13175                     LD->getPointerInfo(), LDAlign);
13176 
13177     SDValue NewST =
13178         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13179                      ST->getPointerInfo(), STAlign);
13180 
13181     AddToWorklist(NewLD.getNode());
13182     AddToWorklist(NewST.getNode());
13183     WorklistRemover DeadNodes(*this);
13184     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13185     ++LdStFP2Int;
13186     return NewST;
13187   }
13188 
13189   return SDValue();
13190 }
13191 
13192 // This is a helper function for visitMUL to check the profitability
13193 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13194 // MulNode is the original multiply, AddNode is (add x, c1),
13195 // and ConstNode is c2.
13196 //
13197 // If the (add x, c1) has multiple uses, we could increase
13198 // the number of adds if we make this transformation.
13199 // It would only be worth doing this if we can remove a
13200 // multiply in the process. Check for that here.
13201 // To illustrate:
13202 //     (A + c1) * c3
13203 //     (A + c2) * c3
13204 // We're checking for cases where we have common "c3 * A" expressions.
13205 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13206                                               SDValue &AddNode,
13207                                               SDValue &ConstNode) {
13208   APInt Val;
13209 
13210   // If the add only has one use, this would be OK to do.
13211   if (AddNode.getNode()->hasOneUse())
13212     return true;
13213 
13214   // Walk all the users of the constant with which we're multiplying.
13215   for (SDNode *Use : ConstNode->uses()) {
13216     if (Use == MulNode) // This use is the one we're on right now. Skip it.
13217       continue;
13218 
13219     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13220       SDNode *OtherOp;
13221       SDNode *MulVar = AddNode.getOperand(0).getNode();
13222 
13223       // OtherOp is what we're multiplying against the constant.
13224       if (Use->getOperand(0) == ConstNode)
13225         OtherOp = Use->getOperand(1).getNode();
13226       else
13227         OtherOp = Use->getOperand(0).getNode();
13228 
13229       // Check to see if multiply is with the same operand of our "add".
13230       //
13231       //     ConstNode  = CONST
13232       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
13233       //     ...
13234       //     AddNode  = (A + c1)  <-- MulVar is A.
13235       //         = AddNode * ConstNode   <-- current visiting instruction.
13236       //
13237       // If we make this transformation, we will have a common
13238       // multiply (ConstNode * A) that we can save.
13239       if (OtherOp == MulVar)
13240         return true;
13241 
13242       // Now check to see if a future expansion will give us a common
13243       // multiply.
13244       //
13245       //     ConstNode  = CONST
13246       //     AddNode    = (A + c1)
13247       //     ...   = AddNode * ConstNode <-- current visiting instruction.
13248       //     ...
13249       //     OtherOp = (A + c2)
13250       //     Use     = OtherOp * ConstNode <-- visiting Use.
13251       //
13252       // If we make this transformation, we will have a common
13253       // multiply (CONST * A) after we also do the same transformation
13254       // to the "t2" instruction.
13255       if (OtherOp->getOpcode() == ISD::ADD &&
13256           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13257           OtherOp->getOperand(0).getNode() == MulVar)
13258         return true;
13259     }
13260   }
13261 
13262   // Didn't find a case where this would be profitable.
13263   return false;
13264 }
13265 
13266 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13267                                          unsigned NumStores) {
13268   SmallVector<SDValue, 8> Chains;
13269   SmallPtrSet<const SDNode *, 8> Visited;
13270   SDLoc StoreDL(StoreNodes[0].MemNode);
13271 
13272   for (unsigned i = 0; i < NumStores; ++i) {
13273     Visited.insert(StoreNodes[i].MemNode);
13274   }
13275 
13276   // don't include nodes that are children
13277   for (unsigned i = 0; i < NumStores; ++i) {
13278     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13279       Chains.push_back(StoreNodes[i].MemNode->getChain());
13280   }
13281 
13282   assert(Chains.size() > 0 && "Chain should have generated a chain");
13283   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13284 }
13285 
13286 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13287     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13288     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13289   // Make sure we have something to merge.
13290   if (NumStores < 2)
13291     return false;
13292 
13293   // The latest Node in the DAG.
13294   SDLoc DL(StoreNodes[0].MemNode);
13295 
13296   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13297   unsigned SizeInBits = NumStores * ElementSizeBits;
13298   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13299 
13300   EVT StoreTy;
13301   if (UseVector) {
13302     unsigned Elts = NumStores * NumMemElts;
13303     // Get the type for the merged vector store.
13304     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13305   } else
13306     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
13307 
13308   SDValue StoredVal;
13309   if (UseVector) {
13310     if (IsConstantSrc) {
13311       SmallVector<SDValue, 8> BuildVector;
13312       for (unsigned I = 0; I != NumStores; ++I) {
13313         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
13314         SDValue Val = St->getValue();
13315         // If constant is of the wrong type, convert it now.
13316         if (MemVT != Val.getValueType()) {
13317           Val = peekThroughBitcast(Val);
13318           // Deal with constants of wrong size.
13319           if (ElementSizeBits != Val.getValueSizeInBits()) {
13320             EVT IntMemVT =
13321                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
13322             if (isa<ConstantFPSDNode>(Val)) {
13323               // Not clear how to truncate FP values.
13324               return false;
13325             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
13326               Val = DAG.getConstant(C->getAPIntValue()
13327                                         .zextOrTrunc(Val.getValueSizeInBits())
13328                                         .zextOrTrunc(ElementSizeBits),
13329                                     SDLoc(C), IntMemVT);
13330           }
13331           // Make sure correctly size type is the correct type.
13332           Val = DAG.getBitcast(MemVT, Val);
13333         }
13334         BuildVector.push_back(Val);
13335       }
13336       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13337                                                : ISD::BUILD_VECTOR,
13338                               DL, StoreTy, BuildVector);
13339     } else {
13340       SmallVector<SDValue, 8> Ops;
13341       for (unsigned i = 0; i < NumStores; ++i) {
13342         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13343         SDValue Val = peekThroughBitcast(St->getValue());
13344         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
13345         // type MemVT. If the underlying value is not the correct
13346         // type, but it is an extraction of an appropriate vector we
13347         // can recast Val to be of the correct type. This may require
13348         // converting between EXTRACT_VECTOR_ELT and
13349         // EXTRACT_SUBVECTOR.
13350         if ((MemVT != Val.getValueType()) &&
13351             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13352              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
13353           SDValue Vec = Val.getOperand(0);
13354           EVT MemVTScalarTy = MemVT.getScalarType();
13355           // We may need to add a bitcast here to get types to line up.
13356           if (MemVTScalarTy != Vec.getValueType()) {
13357             unsigned Elts = Vec.getValueType().getSizeInBits() /
13358                             MemVTScalarTy.getSizeInBits();
13359             EVT NewVecTy =
13360                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
13361             Vec = DAG.getBitcast(NewVecTy, Vec);
13362           }
13363           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
13364                                         : ISD::EXTRACT_VECTOR_ELT;
13365           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
13366         }
13367         Ops.push_back(Val);
13368       }
13369 
13370       // Build the extracted vector elements back into a vector.
13371       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13372                                                : ISD::BUILD_VECTOR,
13373                               DL, StoreTy, Ops);
13374     }
13375   } else {
13376     // We should always use a vector store when merging extracted vector
13377     // elements, so this path implies a store of constants.
13378     assert(IsConstantSrc && "Merged vector elements should use vector store");
13379 
13380     APInt StoreInt(SizeInBits, 0);
13381 
13382     // Construct a single integer constant which is made of the smaller
13383     // constant inputs.
13384     bool IsLE = DAG.getDataLayout().isLittleEndian();
13385     for (unsigned i = 0; i < NumStores; ++i) {
13386       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
13387       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
13388 
13389       SDValue Val = St->getValue();
13390       Val = peekThroughBitcast(Val);
13391       StoreInt <<= ElementSizeBits;
13392       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
13393         StoreInt |= C->getAPIntValue()
13394                         .zextOrTrunc(ElementSizeBits)
13395                         .zextOrTrunc(SizeInBits);
13396       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
13397         StoreInt |= C->getValueAPF()
13398                         .bitcastToAPInt()
13399                         .zextOrTrunc(ElementSizeBits)
13400                         .zextOrTrunc(SizeInBits);
13401         // If fp truncation is necessary give up for now.
13402         if (MemVT.getSizeInBits() != ElementSizeBits)
13403           return false;
13404       } else {
13405         llvm_unreachable("Invalid constant element type");
13406       }
13407     }
13408 
13409     // Create the new Load and Store operations.
13410     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
13411   }
13412 
13413   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13414   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
13415 
13416   // make sure we use trunc store if it's necessary to be legal.
13417   SDValue NewStore;
13418   if (!UseTrunc) {
13419     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
13420                             FirstInChain->getPointerInfo(),
13421                             FirstInChain->getAlignment());
13422   } else { // Must be realized as a trunc store
13423     EVT LegalizedStoredValTy =
13424         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
13425     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
13426     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
13427     SDValue ExtendedStoreVal =
13428         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
13429                         LegalizedStoredValTy);
13430     NewStore = DAG.getTruncStore(
13431         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
13432         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
13433         FirstInChain->getAlignment(),
13434         FirstInChain->getMemOperand()->getFlags());
13435   }
13436 
13437   // Replace all merged stores with the new store.
13438   for (unsigned i = 0; i < NumStores; ++i)
13439     CombineTo(StoreNodes[i].MemNode, NewStore);
13440 
13441   AddToWorklist(NewChain.getNode());
13442   return true;
13443 }
13444 
13445 void DAGCombiner::getStoreMergeCandidates(
13446     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
13447     SDNode *&RootNode) {
13448   // This holds the base pointer, index, and the offset in bytes from the base
13449   // pointer.
13450   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
13451   EVT MemVT = St->getMemoryVT();
13452 
13453   SDValue Val = peekThroughBitcast(St->getValue());
13454   // We must have a base and an offset.
13455   if (!BasePtr.getBase().getNode())
13456     return;
13457 
13458   // Do not handle stores to undef base pointers.
13459   if (BasePtr.getBase().isUndef())
13460     return;
13461 
13462   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
13463   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13464                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13465   bool IsLoadSrc = isa<LoadSDNode>(Val);
13466   BaseIndexOffset LBasePtr;
13467   // Match on loadbaseptr if relevant.
13468   EVT LoadVT;
13469   if (IsLoadSrc) {
13470     auto *Ld = cast<LoadSDNode>(Val);
13471     LBasePtr = BaseIndexOffset::match(Ld, DAG);
13472     LoadVT = Ld->getMemoryVT();
13473     // Load and store should be the same type.
13474     if (MemVT != LoadVT)
13475       return;
13476     // Loads must only have one use.
13477     if (!Ld->hasNUsesOfValue(1, 0))
13478       return;
13479     // The memory operands must not be volatile.
13480     if (Ld->isVolatile() || Ld->isIndexed())
13481       return;
13482   }
13483   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13484                             int64_t &Offset) -> bool {
13485     if (Other->isVolatile() || Other->isIndexed())
13486       return false;
13487     SDValue Val = peekThroughBitcast(Other->getValue());
13488     // Allow merging constants of different types as integers.
13489     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13490                                            : Other->getMemoryVT() != MemVT;
13491     if (IsLoadSrc) {
13492       if (NoTypeMatch)
13493         return false;
13494       // The Load's Base Ptr must also match
13495       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13496         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13497         if (LoadVT != OtherLd->getMemoryVT())
13498           return false;
13499         // Loads must only have one use.
13500         if (!OtherLd->hasNUsesOfValue(1, 0))
13501           return false;
13502         // The memory operands must not be volatile.
13503         if (OtherLd->isVolatile() || OtherLd->isIndexed())
13504           return false;
13505         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13506           return false;
13507       } else
13508         return false;
13509     }
13510     if (IsConstantSrc) {
13511       if (NoTypeMatch)
13512         return false;
13513       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13514         return false;
13515     }
13516     if (IsExtractVecSrc) {
13517       // Do not merge truncated stores here.
13518       if (Other->isTruncatingStore())
13519         return false;
13520       if (!MemVT.bitsEq(Val.getValueType()))
13521         return false;
13522       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13523           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13524         return false;
13525     }
13526     Ptr = BaseIndexOffset::match(Other, DAG);
13527     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13528   };
13529 
13530   // We looking for a root node which is an ancestor to all mergable
13531   // stores. We search up through a load, to our root and then down
13532   // through all children. For instance we will find Store{1,2,3} if
13533   // St is Store1, Store2. or Store3 where the root is not a load
13534   // which always true for nonvolatile ops. TODO: Expand
13535   // the search to find all valid candidates through multiple layers of loads.
13536   //
13537   // Root
13538   // |-------|-------|
13539   // Load    Load    Store3
13540   // |       |
13541   // Store1   Store2
13542   //
13543   // FIXME: We should be able to climb and
13544   // descend TokenFactors to find candidates as well.
13545 
13546   RootNode = St->getChain().getNode();
13547 
13548   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13549     RootNode = Ldn->getChain().getNode();
13550     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13551       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13552         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13553           if (I2.getOperandNo() == 0)
13554             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13555               BaseIndexOffset Ptr;
13556               int64_t PtrDiff;
13557               if (CandidateMatch(OtherST, Ptr, PtrDiff))
13558                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13559             }
13560   } else
13561     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13562       if (I.getOperandNo() == 0)
13563         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13564           BaseIndexOffset Ptr;
13565           int64_t PtrDiff;
13566           if (CandidateMatch(OtherST, Ptr, PtrDiff))
13567             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13568         }
13569 }
13570 
13571 // We need to check that merging these stores does not cause a loop in
13572 // the DAG. Any store candidate may depend on another candidate
13573 // indirectly through its operand (we already consider dependencies
13574 // through the chain). Check in parallel by searching up from
13575 // non-chain operands of candidates.
13576 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13577     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
13578     SDNode *RootNode) {
13579   // FIXME: We should be able to truncate a full search of
13580   // predecessors by doing a BFS and keeping tabs the originating
13581   // stores from which worklist nodes come from in a similar way to
13582   // TokenFactor simplfication.
13583 
13584   SmallPtrSet<const SDNode *, 32> Visited;
13585   SmallVector<const SDNode *, 8> Worklist;
13586 
13587   // RootNode is a predecessor to all candidates so we need not search
13588   // past it. Add RootNode (peeking through TokenFactors). Do not count
13589   // these towards size check.
13590 
13591   Worklist.push_back(RootNode);
13592   while (!Worklist.empty()) {
13593     auto N = Worklist.pop_back_val();
13594     if (N->getOpcode() == ISD::TokenFactor) {
13595       for (SDValue Op : N->ops())
13596         Worklist.push_back(Op.getNode());
13597     }
13598     Visited.insert(N);
13599   }
13600 
13601   // Don't count pruning nodes towards max.
13602   unsigned int Max = 1024 + Visited.size();
13603   // Search Ops of store candidates.
13604   for (unsigned i = 0; i < NumStores; ++i) {
13605     SDNode *N = StoreNodes[i].MemNode;
13606     // Of the 4 Store Operands:
13607     //   * Chain (Op 0) -> We have already considered these
13608     //                    in candidate selection and can be
13609     //                    safely ignored
13610     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
13611     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
13612     //                      and so no cycles are possible.
13613     //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
13614     //
13615     // Thus we need only check predecessors of the value operands.
13616     auto *Op = N->getOperand(1).getNode();
13617     if (Visited.insert(Op).second)
13618       Worklist.push_back(Op);
13619   }
13620   // Search through DAG. We can stop early if we find a store node.
13621   for (unsigned i = 0; i < NumStores; ++i)
13622     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
13623                                      Max))
13624       return false;
13625   return true;
13626 }
13627 
13628 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13629   if (OptLevel == CodeGenOpt::None)
13630     return false;
13631 
13632   EVT MemVT = St->getMemoryVT();
13633   int64_t ElementSizeBytes = MemVT.getStoreSize();
13634   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13635 
13636   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
13637     return false;
13638 
13639   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
13640       Attribute::NoImplicitFloat);
13641 
13642   // This function cannot currently deal with non-byte-sized memory sizes.
13643   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
13644     return false;
13645 
13646   if (!MemVT.isSimple())
13647     return false;
13648 
13649   // Perform an early exit check. Do not bother looking at stored values that
13650   // are not constants, loads, or extracted vector elements.
13651   SDValue StoredVal = peekThroughBitcast(St->getValue());
13652   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
13653   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
13654                        isa<ConstantFPSDNode>(StoredVal);
13655   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13656                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13657 
13658   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
13659     return false;
13660 
13661   SmallVector<MemOpLink, 8> StoreNodes;
13662   SDNode *RootNode;
13663   // Find potential store merge candidates by searching through chain sub-DAG
13664   getStoreMergeCandidates(St, StoreNodes, RootNode);
13665 
13666   // Check if there is anything to merge.
13667   if (StoreNodes.size() < 2)
13668     return false;
13669 
13670   // Sort the memory operands according to their distance from the
13671   // base pointer.
13672   llvm::sort(StoreNodes.begin(), StoreNodes.end(),
13673              [](MemOpLink LHS, MemOpLink RHS) {
13674                return LHS.OffsetFromBase < RHS.OffsetFromBase;
13675              });
13676 
13677   // Store Merge attempts to merge the lowest stores. This generally
13678   // works out as if successful, as the remaining stores are checked
13679   // after the first collection of stores is merged. However, in the
13680   // case that a non-mergeable store is found first, e.g., {p[-2],
13681   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
13682   // mergeable cases. To prevent this, we prune such stores from the
13683   // front of StoreNodes here.
13684 
13685   bool RV = false;
13686   while (StoreNodes.size() > 1) {
13687     unsigned StartIdx = 0;
13688     while ((StartIdx + 1 < StoreNodes.size()) &&
13689            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
13690                StoreNodes[StartIdx + 1].OffsetFromBase)
13691       ++StartIdx;
13692 
13693     // Bail if we don't have enough candidates to merge.
13694     if (StartIdx + 1 >= StoreNodes.size())
13695       return RV;
13696 
13697     if (StartIdx)
13698       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
13699 
13700     // Scan the memory operations on the chain and find the first
13701     // non-consecutive store memory address.
13702     unsigned NumConsecutiveStores = 1;
13703     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
13704     // Check that the addresses are consecutive starting from the second
13705     // element in the list of stores.
13706     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
13707       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
13708       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13709         break;
13710       NumConsecutiveStores = i + 1;
13711     }
13712 
13713     if (NumConsecutiveStores < 2) {
13714       StoreNodes.erase(StoreNodes.begin(),
13715                        StoreNodes.begin() + NumConsecutiveStores);
13716       continue;
13717     }
13718 
13719     // The node with the lowest store address.
13720     LLVMContext &Context = *DAG.getContext();
13721     const DataLayout &DL = DAG.getDataLayout();
13722 
13723     // Store the constants into memory as one consecutive store.
13724     if (IsConstantSrc) {
13725       while (NumConsecutiveStores >= 2) {
13726         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13727         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13728         unsigned FirstStoreAlign = FirstInChain->getAlignment();
13729         unsigned LastLegalType = 1;
13730         unsigned LastLegalVectorType = 1;
13731         bool LastIntegerTrunc = false;
13732         bool NonZero = false;
13733         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
13734         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13735           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
13736           SDValue StoredVal = ST->getValue();
13737           bool IsElementZero = false;
13738           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
13739             IsElementZero = C->isNullValue();
13740           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
13741             IsElementZero = C->getConstantFPValue()->isNullValue();
13742           if (IsElementZero) {
13743             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
13744               FirstZeroAfterNonZero = i;
13745           }
13746           NonZero |= !IsElementZero;
13747 
13748           // Find a legal type for the constant store.
13749           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
13750           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
13751           bool IsFast = false;
13752 
13753           // Break early when size is too large to be legal.
13754           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
13755             break;
13756 
13757           if (TLI.isTypeLegal(StoreTy) &&
13758               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13759               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13760                                      FirstStoreAlign, &IsFast) &&
13761               IsFast) {
13762             LastIntegerTrunc = false;
13763             LastLegalType = i + 1;
13764             // Or check whether a truncstore is legal.
13765           } else if (TLI.getTypeAction(Context, StoreTy) ==
13766                      TargetLowering::TypePromoteInteger) {
13767             EVT LegalizedStoredValTy =
13768                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
13769             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
13770                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
13771                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13772                                        FirstStoreAlign, &IsFast) &&
13773                 IsFast) {
13774               LastIntegerTrunc = true;
13775               LastLegalType = i + 1;
13776             }
13777           }
13778 
13779           // We only use vectors if the constant is known to be zero or the
13780           // target allows it and the function is not marked with the
13781           // noimplicitfloat attribute.
13782           if ((!NonZero ||
13783                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
13784               !NoVectors) {
13785             // Find a legal type for the vector store.
13786             unsigned Elts = (i + 1) * NumMemElts;
13787             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13788             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
13789                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13790                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13791                                        FirstStoreAlign, &IsFast) &&
13792                 IsFast)
13793               LastLegalVectorType = i + 1;
13794           }
13795         }
13796 
13797         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
13798         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
13799 
13800         // Check if we found a legal integer type that creates a meaningful
13801         // merge.
13802         if (NumElem < 2) {
13803           // We know that candidate stores are in order and of correct
13804           // shape. While there is no mergeable sequence from the
13805           // beginning one may start later in the sequence. The only
13806           // reason a merge of size N could have failed where another of
13807           // the same size would not have, is if the alignment has
13808           // improved or we've dropped a non-zero value. Drop as many
13809           // candidates as we can here.
13810           unsigned NumSkip = 1;
13811           while (
13812               (NumSkip < NumConsecutiveStores) &&
13813               (NumSkip < FirstZeroAfterNonZero) &&
13814               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13815             NumSkip++;
13816 
13817           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13818           NumConsecutiveStores -= NumSkip;
13819           continue;
13820         }
13821 
13822         // Check that we can merge these candidates without causing a cycle.
13823         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
13824                                                       RootNode)) {
13825           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13826           NumConsecutiveStores -= NumElem;
13827           continue;
13828         }
13829 
13830         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
13831                                               UseVector, LastIntegerTrunc);
13832 
13833         // Remove merged stores for next iteration.
13834         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13835         NumConsecutiveStores -= NumElem;
13836       }
13837       continue;
13838     }
13839 
13840     // When extracting multiple vector elements, try to store them
13841     // in one vector store rather than a sequence of scalar stores.
13842     if (IsExtractVecSrc) {
13843       // Loop on Consecutive Stores on success.
13844       while (NumConsecutiveStores >= 2) {
13845         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13846         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13847         unsigned FirstStoreAlign = FirstInChain->getAlignment();
13848         unsigned NumStoresToMerge = 1;
13849         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13850           // Find a legal type for the vector store.
13851           unsigned Elts = (i + 1) * NumMemElts;
13852           EVT Ty =
13853               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13854           bool IsFast;
13855 
13856           // Break early when size is too large to be legal.
13857           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
13858             break;
13859 
13860           if (TLI.isTypeLegal(Ty) &&
13861               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
13862               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
13863                                      FirstStoreAlign, &IsFast) &&
13864               IsFast)
13865             NumStoresToMerge = i + 1;
13866         }
13867 
13868         // Check if we found a legal integer type creating a meaningful
13869         // merge.
13870         if (NumStoresToMerge < 2) {
13871           // We know that candidate stores are in order and of correct
13872           // shape. While there is no mergeable sequence from the
13873           // beginning one may start later in the sequence. The only
13874           // reason a merge of size N could have failed where another of
13875           // the same size would not have, is if the alignment has
13876           // improved. Drop as many candidates as we can here.
13877           unsigned NumSkip = 1;
13878           while (
13879               (NumSkip < NumConsecutiveStores) &&
13880               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
13881             NumSkip++;
13882 
13883           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
13884           NumConsecutiveStores -= NumSkip;
13885           continue;
13886         }
13887 
13888         // Check that we can merge these candidates without causing a cycle.
13889         if (!checkMergeStoreCandidatesForDependencies(
13890                 StoreNodes, NumStoresToMerge, RootNode)) {
13891           StoreNodes.erase(StoreNodes.begin(),
13892                            StoreNodes.begin() + NumStoresToMerge);
13893           NumConsecutiveStores -= NumStoresToMerge;
13894           continue;
13895         }
13896 
13897         RV |= MergeStoresOfConstantsOrVecElts(
13898             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
13899 
13900         StoreNodes.erase(StoreNodes.begin(),
13901                          StoreNodes.begin() + NumStoresToMerge);
13902         NumConsecutiveStores -= NumStoresToMerge;
13903       }
13904       continue;
13905     }
13906 
13907     // Below we handle the case of multiple consecutive stores that
13908     // come from multiple consecutive loads. We merge them into a single
13909     // wide load and a single wide store.
13910 
13911     // Look for load nodes which are used by the stored values.
13912     SmallVector<MemOpLink, 8> LoadNodes;
13913 
13914     // Find acceptable loads. Loads need to have the same chain (token factor),
13915     // must not be zext, volatile, indexed, and they must be consecutive.
13916     BaseIndexOffset LdBasePtr;
13917 
13918     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
13919       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13920       SDValue Val = peekThroughBitcast(St->getValue());
13921       LoadSDNode *Ld = cast<LoadSDNode>(Val);
13922 
13923       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
13924       // If this is not the first ptr that we check.
13925       int64_t LdOffset = 0;
13926       if (LdBasePtr.getBase().getNode()) {
13927         // The base ptr must be the same.
13928         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
13929           break;
13930       } else {
13931         // Check that all other base pointers are the same as this one.
13932         LdBasePtr = LdPtr;
13933       }
13934 
13935       // We found a potential memory operand to merge.
13936       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
13937     }
13938 
13939     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
13940       // If we have load/store pair instructions and we only have two values,
13941       // don't bother merging.
13942       unsigned RequiredAlignment;
13943       if (LoadNodes.size() == 2 &&
13944           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
13945           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
13946         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
13947         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
13948         break;
13949       }
13950       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13951       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
13952       unsigned FirstStoreAlign = FirstInChain->getAlignment();
13953       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
13954       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
13955       unsigned FirstLoadAlign = FirstLoad->getAlignment();
13956 
13957       // Scan the memory operations on the chain and find the first
13958       // non-consecutive load memory address. These variables hold the index in
13959       // the store node array.
13960 
13961       unsigned LastConsecutiveLoad = 1;
13962 
13963       // This variable refers to the size and not index in the array.
13964       unsigned LastLegalVectorType = 1;
13965       unsigned LastLegalIntegerType = 1;
13966       bool isDereferenceable = true;
13967       bool DoIntegerTruncate = false;
13968       StartAddress = LoadNodes[0].OffsetFromBase;
13969       SDValue FirstChain = FirstLoad->getChain();
13970       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
13971         // All loads must share the same chain.
13972         if (LoadNodes[i].MemNode->getChain() != FirstChain)
13973           break;
13974 
13975         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
13976         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
13977           break;
13978         LastConsecutiveLoad = i;
13979 
13980         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
13981           isDereferenceable = false;
13982 
13983         // Find a legal type for the vector store.
13984         unsigned Elts = (i + 1) * NumMemElts;
13985         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
13986 
13987         // Break early when size is too large to be legal.
13988         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
13989           break;
13990 
13991         bool IsFastSt, IsFastLd;
13992         if (TLI.isTypeLegal(StoreTy) &&
13993             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
13994             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13995                                    FirstStoreAlign, &IsFastSt) &&
13996             IsFastSt &&
13997             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
13998                                    FirstLoadAlign, &IsFastLd) &&
13999             IsFastLd) {
14000           LastLegalVectorType = i + 1;
14001         }
14002 
14003         // Find a legal type for the integer store.
14004         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14005         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14006         if (TLI.isTypeLegal(StoreTy) &&
14007             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14008             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14009                                    FirstStoreAlign, &IsFastSt) &&
14010             IsFastSt &&
14011             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14012                                    FirstLoadAlign, &IsFastLd) &&
14013             IsFastLd) {
14014           LastLegalIntegerType = i + 1;
14015           DoIntegerTruncate = false;
14016           // Or check whether a truncstore and extload is legal.
14017         } else if (TLI.getTypeAction(Context, StoreTy) ==
14018                    TargetLowering::TypePromoteInteger) {
14019           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14020           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14021               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14022               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14023                                  StoreTy) &&
14024               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14025                                  StoreTy) &&
14026               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14027               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14028                                      FirstStoreAlign, &IsFastSt) &&
14029               IsFastSt &&
14030               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14031                                      FirstLoadAlign, &IsFastLd) &&
14032               IsFastLd) {
14033             LastLegalIntegerType = i + 1;
14034             DoIntegerTruncate = true;
14035           }
14036         }
14037       }
14038 
14039       // Only use vector types if the vector type is larger than the integer
14040       // type. If they are the same, use integers.
14041       bool UseVectorTy =
14042           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14043       unsigned LastLegalType =
14044           std::max(LastLegalVectorType, LastLegalIntegerType);
14045 
14046       // We add +1 here because the LastXXX variables refer to location while
14047       // the NumElem refers to array/index size.
14048       unsigned NumElem =
14049           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14050       NumElem = std::min(LastLegalType, NumElem);
14051 
14052       if (NumElem < 2) {
14053         // We know that candidate stores are in order and of correct
14054         // shape. While there is no mergeable sequence from the
14055         // beginning one may start later in the sequence. The only
14056         // reason a merge of size N could have failed where another of
14057         // the same size would not have is if the alignment or either
14058         // the load or store has improved. Drop as many candidates as we
14059         // can here.
14060         unsigned NumSkip = 1;
14061         while ((NumSkip < LoadNodes.size()) &&
14062                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14063                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14064           NumSkip++;
14065         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14066         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14067         NumConsecutiveStores -= NumSkip;
14068         continue;
14069       }
14070 
14071       // Check that we can merge these candidates without causing a cycle.
14072       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14073                                                     RootNode)) {
14074         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14075         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14076         NumConsecutiveStores -= NumElem;
14077         continue;
14078       }
14079 
14080       // Find if it is better to use vectors or integers to load and store
14081       // to memory.
14082       EVT JointMemOpVT;
14083       if (UseVectorTy) {
14084         // Find a legal type for the vector store.
14085         unsigned Elts = NumElem * NumMemElts;
14086         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14087       } else {
14088         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14089         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14090       }
14091 
14092       SDLoc LoadDL(LoadNodes[0].MemNode);
14093       SDLoc StoreDL(StoreNodes[0].MemNode);
14094 
14095       // The merged loads are required to have the same incoming chain, so
14096       // using the first's chain is acceptable.
14097 
14098       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14099       AddToWorklist(NewStoreChain.getNode());
14100 
14101       MachineMemOperand::Flags MMOFlags =
14102           isDereferenceable ? MachineMemOperand::MODereferenceable
14103                             : MachineMemOperand::MONone;
14104 
14105       SDValue NewLoad, NewStore;
14106       if (UseVectorTy || !DoIntegerTruncate) {
14107         NewLoad =
14108             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14109                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14110                         FirstLoadAlign, MMOFlags);
14111         NewStore = DAG.getStore(
14112             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14113             FirstInChain->getPointerInfo(), FirstStoreAlign);
14114       } else { // This must be the truncstore/extload case
14115         EVT ExtendedTy =
14116             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14117         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14118                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
14119                                  FirstLoad->getPointerInfo(), JointMemOpVT,
14120                                  FirstLoadAlign, MMOFlags);
14121         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14122                                      FirstInChain->getBasePtr(),
14123                                      FirstInChain->getPointerInfo(),
14124                                      JointMemOpVT, FirstInChain->getAlignment(),
14125                                      FirstInChain->getMemOperand()->getFlags());
14126       }
14127 
14128       // Transfer chain users from old loads to the new load.
14129       for (unsigned i = 0; i < NumElem; ++i) {
14130         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14131         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14132                                       SDValue(NewLoad.getNode(), 1));
14133       }
14134 
14135       // Replace the all stores with the new store. Recursively remove
14136       // corresponding value if its no longer used.
14137       for (unsigned i = 0; i < NumElem; ++i) {
14138         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14139         CombineTo(StoreNodes[i].MemNode, NewStore);
14140         if (Val.getNode()->use_empty())
14141           recursivelyDeleteUnusedNodes(Val.getNode());
14142       }
14143 
14144       RV = true;
14145       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14146       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14147       NumConsecutiveStores -= NumElem;
14148     }
14149   }
14150   return RV;
14151 }
14152 
14153 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14154   SDLoc SL(ST);
14155   SDValue ReplStore;
14156 
14157   // Replace the chain to avoid dependency.
14158   if (ST->isTruncatingStore()) {
14159     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14160                                   ST->getBasePtr(), ST->getMemoryVT(),
14161                                   ST->getMemOperand());
14162   } else {
14163     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14164                              ST->getMemOperand());
14165   }
14166 
14167   // Create token to keep both nodes around.
14168   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14169                               MVT::Other, ST->getChain(), ReplStore);
14170 
14171   // Make sure the new and old chains are cleaned up.
14172   AddToWorklist(Token.getNode());
14173 
14174   // Don't add users to work list.
14175   return CombineTo(ST, Token, false);
14176 }
14177 
14178 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14179   SDValue Value = ST->getValue();
14180   if (Value.getOpcode() == ISD::TargetConstantFP)
14181     return SDValue();
14182 
14183   SDLoc DL(ST);
14184 
14185   SDValue Chain = ST->getChain();
14186   SDValue Ptr = ST->getBasePtr();
14187 
14188   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14189 
14190   // NOTE: If the original store is volatile, this transform must not increase
14191   // the number of stores.  For example, on x86-32 an f64 can be stored in one
14192   // processor operation but an i64 (which is not legal) requires two.  So the
14193   // transform should not be done in this case.
14194 
14195   SDValue Tmp;
14196   switch (CFP->getSimpleValueType(0).SimpleTy) {
14197   default:
14198     llvm_unreachable("Unknown FP type");
14199   case MVT::f16:    // We don't do this for these yet.
14200   case MVT::f80:
14201   case MVT::f128:
14202   case MVT::ppcf128:
14203     return SDValue();
14204   case MVT::f32:
14205     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14206         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14207       ;
14208       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14209                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14210                             MVT::i32);
14211       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14212     }
14213 
14214     return SDValue();
14215   case MVT::f64:
14216     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14217          !ST->isVolatile()) ||
14218         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14219       ;
14220       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14221                             getZExtValue(), SDLoc(CFP), MVT::i64);
14222       return DAG.getStore(Chain, DL, Tmp,
14223                           Ptr, ST->getMemOperand());
14224     }
14225 
14226     if (!ST->isVolatile() &&
14227         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14228       // Many FP stores are not made apparent until after legalize, e.g. for
14229       // argument passing.  Since this is so common, custom legalize the
14230       // 64-bit integer store into two 32-bit stores.
14231       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14232       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14233       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14234       if (DAG.getDataLayout().isBigEndian())
14235         std::swap(Lo, Hi);
14236 
14237       unsigned Alignment = ST->getAlignment();
14238       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14239       AAMDNodes AAInfo = ST->getAAInfo();
14240 
14241       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14242                                  ST->getAlignment(), MMOFlags, AAInfo);
14243       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14244                         DAG.getConstant(4, DL, Ptr.getValueType()));
14245       Alignment = MinAlign(Alignment, 4U);
14246       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14247                                  ST->getPointerInfo().getWithOffset(4),
14248                                  Alignment, MMOFlags, AAInfo);
14249       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14250                          St0, St1);
14251     }
14252 
14253     return SDValue();
14254   }
14255 }
14256 
14257 SDValue DAGCombiner::visitSTORE(SDNode *N) {
14258   StoreSDNode *ST  = cast<StoreSDNode>(N);
14259   SDValue Chain = ST->getChain();
14260   SDValue Value = ST->getValue();
14261   SDValue Ptr   = ST->getBasePtr();
14262 
14263   // If this is a store of a bit convert, store the input value if the
14264   // resultant store does not need a higher alignment than the original.
14265   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14266       ST->isUnindexed()) {
14267     EVT SVT = Value.getOperand(0).getValueType();
14268     if (((!LegalOperations && !ST->isVolatile()) ||
14269          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
14270         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14271       unsigned OrigAlign = ST->getAlignment();
14272       bool Fast = false;
14273       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14274                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
14275           Fast) {
14276         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14277                             ST->getPointerInfo(), OrigAlign,
14278                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
14279       }
14280     }
14281   }
14282 
14283   // Turn 'store undef, Ptr' -> nothing.
14284   if (Value.isUndef() && ST->isUnindexed())
14285     return Chain;
14286 
14287   // Try to infer better alignment information than the store already has.
14288   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14289     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14290       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14291         SDValue NewStore =
14292             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14293                               ST->getMemoryVT(), Align,
14294                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
14295         // NewStore will always be N as we are only refining the alignment
14296         assert(NewStore.getNode() == N);
14297         (void)NewStore;
14298       }
14299     }
14300   }
14301 
14302   // Try transforming a pair floating point load / store ops to integer
14303   // load / store ops.
14304   if (SDValue NewST = TransformFPLoadStorePair(N))
14305     return NewST;
14306 
14307   if (ST->isUnindexed()) {
14308     // Walk up chain skipping non-aliasing memory nodes, on this store and any
14309     // adjacent stores.
14310     if (findBetterNeighborChains(ST)) {
14311       // replaceStoreChain uses CombineTo, which handled all of the worklist
14312       // manipulation. Return the original node to not do anything else.
14313       return SDValue(ST, 0);
14314     }
14315     Chain = ST->getChain();
14316   }
14317 
14318   // FIXME: is there such a thing as a truncating indexed store?
14319   if (ST->isTruncatingStore() && ST->isUnindexed() &&
14320       Value.getValueType().isInteger()) {
14321     // See if we can simplify the input to this truncstore with knowledge that
14322     // only the low bits are being used.  For example:
14323     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
14324     SDValue Shorter = DAG.GetDemandedBits(
14325         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14326                                     ST->getMemoryVT().getScalarSizeInBits()));
14327     AddToWorklist(Value.getNode());
14328     if (Shorter.getNode())
14329       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
14330                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
14331 
14332     // Otherwise, see if we can simplify the operation with
14333     // SimplifyDemandedBits, which only works if the value has a single use.
14334     if (SimplifyDemandedBits(
14335             Value,
14336             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14337                                  ST->getMemoryVT().getScalarSizeInBits()))) {
14338       // Re-visit the store if anything changed and the store hasn't been merged
14339       // with another node (N is deleted) SimplifyDemandedBits will add Value's
14340       // node back to the worklist if necessary, but we also need to re-visit
14341       // the Store node itself.
14342       if (N->getOpcode() != ISD::DELETED_NODE)
14343         AddToWorklist(N);
14344       return SDValue(N, 0);
14345     }
14346   }
14347 
14348   // If this is a load followed by a store to the same location, then the store
14349   // is dead/noop.
14350   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
14351     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
14352         ST->isUnindexed() && !ST->isVolatile() &&
14353         // There can't be any side effects between the load and store, such as
14354         // a call or store.
14355         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
14356       // The store is dead, remove it.
14357       return Chain;
14358     }
14359   }
14360 
14361   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
14362     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
14363         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
14364         ST->getMemoryVT() == ST1->getMemoryVT()) {
14365       // If this is a store followed by a store with the same value to the same
14366       // location, then the store is dead/noop.
14367       if (ST1->getValue() == Value) {
14368         // The store is dead, remove it.
14369         return Chain;
14370       }
14371 
14372       // If this is a store who's preceeding store to the same location
14373       // and no one other node is chained to that store we can effectively
14374       // drop the store. Do not remove stores to undef as they may be used as
14375       // data sinks.
14376       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
14377           !ST1->getBasePtr().isUndef()) {
14378         // ST1 is fully overwritten and can be elided. Combine with it's chain
14379         // value.
14380         CombineTo(ST1, ST1->getChain());
14381         return SDValue();
14382       }
14383     }
14384   }
14385 
14386   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
14387   // truncating store.  We can do this even if this is already a truncstore.
14388   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
14389       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
14390       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
14391                             ST->getMemoryVT())) {
14392     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
14393                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
14394   }
14395 
14396   // Always perform this optimization before types are legal. If the target
14397   // prefers, also try this after legalization to catch stores that were created
14398   // by intrinsics or other nodes.
14399   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
14400     while (true) {
14401       // There can be multiple store sequences on the same chain.
14402       // Keep trying to merge store sequences until we are unable to do so
14403       // or until we merge the last store on the chain.
14404       bool Changed = MergeConsecutiveStores(ST);
14405       if (!Changed) break;
14406       // Return N as merge only uses CombineTo and no worklist clean
14407       // up is necessary.
14408       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
14409         return SDValue(N, 0);
14410     }
14411   }
14412 
14413   // Try transforming N to an indexed store.
14414   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14415     return SDValue(N, 0);
14416 
14417   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
14418   //
14419   // Make sure to do this only after attempting to merge stores in order to
14420   //  avoid changing the types of some subset of stores due to visit order,
14421   //  preventing their merging.
14422   if (isa<ConstantFPSDNode>(ST->getValue())) {
14423     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
14424       return NewSt;
14425   }
14426 
14427   if (SDValue NewSt = splitMergedValStore(ST))
14428     return NewSt;
14429 
14430   return ReduceLoadOpStoreWidth(N);
14431 }
14432 
14433 /// For the instruction sequence of store below, F and I values
14434 /// are bundled together as an i64 value before being stored into memory.
14435 /// Sometimes it is more efficent to generate separate stores for F and I,
14436 /// which can remove the bitwise instructions or sink them to colder places.
14437 ///
14438 ///   (store (or (zext (bitcast F to i32) to i64),
14439 ///              (shl (zext I to i64), 32)), addr)  -->
14440 ///   (store F, addr) and (store I, addr+4)
14441 ///
14442 /// Similarly, splitting for other merged store can also be beneficial, like:
14443 /// For pair of {i32, i32}, i64 store --> two i32 stores.
14444 /// For pair of {i32, i16}, i64 store --> two i32 stores.
14445 /// For pair of {i16, i16}, i32 store --> two i16 stores.
14446 /// For pair of {i16, i8},  i32 store --> two i16 stores.
14447 /// For pair of {i8, i8},   i16 store --> two i8 stores.
14448 ///
14449 /// We allow each target to determine specifically which kind of splitting is
14450 /// supported.
14451 ///
14452 /// The store patterns are commonly seen from the simple code snippet below
14453 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
14454 ///   void goo(const std::pair<int, float> &);
14455 ///   hoo() {
14456 ///     ...
14457 ///     goo(std::make_pair(tmp, ftmp));
14458 ///     ...
14459 ///   }
14460 ///
14461 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
14462   if (OptLevel == CodeGenOpt::None)
14463     return SDValue();
14464 
14465   SDValue Val = ST->getValue();
14466   SDLoc DL(ST);
14467 
14468   // Match OR operand.
14469   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
14470     return SDValue();
14471 
14472   // Match SHL operand and get Lower and Higher parts of Val.
14473   SDValue Op1 = Val.getOperand(0);
14474   SDValue Op2 = Val.getOperand(1);
14475   SDValue Lo, Hi;
14476   if (Op1.getOpcode() != ISD::SHL) {
14477     std::swap(Op1, Op2);
14478     if (Op1.getOpcode() != ISD::SHL)
14479       return SDValue();
14480   }
14481   Lo = Op2;
14482   Hi = Op1.getOperand(0);
14483   if (!Op1.hasOneUse())
14484     return SDValue();
14485 
14486   // Match shift amount to HalfValBitSize.
14487   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
14488   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
14489   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
14490     return SDValue();
14491 
14492   // Lo and Hi are zero-extended from int with size less equal than 32
14493   // to i64.
14494   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
14495       !Lo.getOperand(0).getValueType().isScalarInteger() ||
14496       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
14497       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
14498       !Hi.getOperand(0).getValueType().isScalarInteger() ||
14499       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
14500     return SDValue();
14501 
14502   // Use the EVT of low and high parts before bitcast as the input
14503   // of target query.
14504   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
14505                   ? Lo.getOperand(0).getValueType()
14506                   : Lo.getValueType();
14507   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
14508                    ? Hi.getOperand(0).getValueType()
14509                    : Hi.getValueType();
14510   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
14511     return SDValue();
14512 
14513   // Start to split store.
14514   unsigned Alignment = ST->getAlignment();
14515   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14516   AAMDNodes AAInfo = ST->getAAInfo();
14517 
14518   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
14519   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
14520   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
14521   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
14522 
14523   SDValue Chain = ST->getChain();
14524   SDValue Ptr = ST->getBasePtr();
14525   // Lower value store.
14526   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14527                              ST->getAlignment(), MMOFlags, AAInfo);
14528   Ptr =
14529       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14530                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
14531   // Higher value store.
14532   SDValue St1 =
14533       DAG.getStore(St0, DL, Hi, Ptr,
14534                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14535                    Alignment / 2, MMOFlags, AAInfo);
14536   return St1;
14537 }
14538 
14539 /// Convert a disguised subvector insertion into a shuffle:
14540 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14541 /// bitcast(shuffle (bitcast V), (extended X), Mask)
14542 /// Note: We do not use an insert_subvector node because that requires a legal
14543 /// subvector type.
14544 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14545   SDValue InsertVal = N->getOperand(1);
14546   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14547       !InsertVal.getOperand(0).getValueType().isVector())
14548     return SDValue();
14549 
14550   SDValue SubVec = InsertVal.getOperand(0);
14551   SDValue DestVec = N->getOperand(0);
14552   EVT SubVecVT = SubVec.getValueType();
14553   EVT VT = DestVec.getValueType();
14554   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14555   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14556   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14557 
14558   // Step 1: Create a shuffle mask that implements this insert operation. The
14559   // vector that we are inserting into will be operand 0 of the shuffle, so
14560   // those elements are just 'i'. The inserted subvector is in the first
14561   // positions of operand 1 of the shuffle. Example:
14562   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14563   SmallVector<int, 16> Mask(NumMaskVals);
14564   for (unsigned i = 0; i != NumMaskVals; ++i) {
14565     if (i / NumSrcElts == InsIndex)
14566       Mask[i] = (i % NumSrcElts) + NumMaskVals;
14567     else
14568       Mask[i] = i;
14569   }
14570 
14571   // Bail out if the target can not handle the shuffle we want to create.
14572   EVT SubVecEltVT = SubVecVT.getVectorElementType();
14573   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14574   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14575     return SDValue();
14576 
14577   // Step 2: Create a wide vector from the inserted source vector by appending
14578   // undefined elements. This is the same size as our destination vector.
14579   SDLoc DL(N);
14580   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14581   ConcatOps[0] = SubVec;
14582   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
14583 
14584   // Step 3: Shuffle in the padded subvector.
14585   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
14586   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14587   AddToWorklist(PaddedSubV.getNode());
14588   AddToWorklist(DestVecBC.getNode());
14589   AddToWorklist(Shuf.getNode());
14590   return DAG.getBitcast(VT, Shuf);
14591 }
14592 
14593 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14594   SDValue InVec = N->getOperand(0);
14595   SDValue InVal = N->getOperand(1);
14596   SDValue EltNo = N->getOperand(2);
14597   SDLoc DL(N);
14598 
14599   // If the inserted element is an UNDEF, just use the input vector.
14600   if (InVal.isUndef())
14601     return InVec;
14602 
14603   EVT VT = InVec.getValueType();
14604 
14605   // Remove redundant insertions:
14606   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14607   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14608       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14609     return InVec;
14610 
14611   // We must know which element is being inserted for folds below here.
14612   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14613   if (!IndexC)
14614     return SDValue();
14615   unsigned Elt = IndexC->getZExtValue();
14616 
14617   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
14618     return Shuf;
14619 
14620   // Canonicalize insert_vector_elt dag nodes.
14621   // Example:
14622   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
14623   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
14624   //
14625   // Do this only if the child insert_vector node has one use; also
14626   // do this only if indices are both constants and Idx1 < Idx0.
14627   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
14628       && isa<ConstantSDNode>(InVec.getOperand(2))) {
14629     unsigned OtherElt = InVec.getConstantOperandVal(2);
14630     if (Elt < OtherElt) {
14631       // Swap nodes.
14632       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
14633                                   InVec.getOperand(0), InVal, EltNo);
14634       AddToWorklist(NewOp.getNode());
14635       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
14636                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
14637     }
14638   }
14639 
14640   // If we can't generate a legal BUILD_VECTOR, exit
14641   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
14642     return SDValue();
14643 
14644   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
14645   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
14646   // vector elements.
14647   SmallVector<SDValue, 8> Ops;
14648   // Do not combine these two vectors if the output vector will not replace
14649   // the input vector.
14650   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
14651     Ops.append(InVec.getNode()->op_begin(),
14652                InVec.getNode()->op_end());
14653   } else if (InVec.isUndef()) {
14654     unsigned NElts = VT.getVectorNumElements();
14655     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
14656   } else {
14657     return SDValue();
14658   }
14659 
14660   // Insert the element
14661   if (Elt < Ops.size()) {
14662     // All the operands of BUILD_VECTOR must have the same type;
14663     // we enforce that here.
14664     EVT OpVT = Ops[0].getValueType();
14665     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
14666   }
14667 
14668   // Return the new vector
14669   return DAG.getBuildVector(VT, DL, Ops);
14670 }
14671 
14672 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
14673     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
14674   assert(!OriginalLoad->isVolatile());
14675 
14676   EVT ResultVT = EVE->getValueType(0);
14677   EVT VecEltVT = InVecVT.getVectorElementType();
14678   unsigned Align = OriginalLoad->getAlignment();
14679   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
14680       VecEltVT.getTypeForEVT(*DAG.getContext()));
14681 
14682   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
14683     return SDValue();
14684 
14685   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
14686     ISD::NON_EXTLOAD : ISD::EXTLOAD;
14687   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
14688     return SDValue();
14689 
14690   Align = NewAlign;
14691 
14692   SDValue NewPtr = OriginalLoad->getBasePtr();
14693   SDValue Offset;
14694   EVT PtrType = NewPtr.getValueType();
14695   MachinePointerInfo MPI;
14696   SDLoc DL(EVE);
14697   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
14698     int Elt = ConstEltNo->getZExtValue();
14699     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
14700     Offset = DAG.getConstant(PtrOff, DL, PtrType);
14701     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
14702   } else {
14703     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
14704     Offset = DAG.getNode(
14705         ISD::MUL, DL, PtrType, Offset,
14706         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
14707     MPI = OriginalLoad->getPointerInfo();
14708   }
14709   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
14710 
14711   // The replacement we need to do here is a little tricky: we need to
14712   // replace an extractelement of a load with a load.
14713   // Use ReplaceAllUsesOfValuesWith to do the replacement.
14714   // Note that this replacement assumes that the extractvalue is the only
14715   // use of the load; that's okay because we don't want to perform this
14716   // transformation in other cases anyway.
14717   SDValue Load;
14718   SDValue Chain;
14719   if (ResultVT.bitsGT(VecEltVT)) {
14720     // If the result type of vextract is wider than the load, then issue an
14721     // extending load instead.
14722     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
14723                                                   VecEltVT)
14724                                    ? ISD::ZEXTLOAD
14725                                    : ISD::EXTLOAD;
14726     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
14727                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
14728                           Align, OriginalLoad->getMemOperand()->getFlags(),
14729                           OriginalLoad->getAAInfo());
14730     Chain = Load.getValue(1);
14731   } else {
14732     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
14733                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
14734                        OriginalLoad->getAAInfo());
14735     Chain = Load.getValue(1);
14736     if (ResultVT.bitsLT(VecEltVT))
14737       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
14738     else
14739       Load = DAG.getBitcast(ResultVT, Load);
14740   }
14741   WorklistRemover DeadNodes(*this);
14742   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
14743   SDValue To[] = { Load, Chain };
14744   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
14745   // Since we're explicitly calling ReplaceAllUses, add the new node to the
14746   // worklist explicitly as well.
14747   AddToWorklist(Load.getNode());
14748   AddUsersToWorklist(Load.getNode()); // Add users too
14749   // Make sure to revisit this node to clean it up; it will usually be dead.
14750   AddToWorklist(EVE);
14751   ++OpsNarrowed;
14752   return SDValue(EVE, 0);
14753 }
14754 
14755 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
14756   // (vextract (scalar_to_vector val, 0) -> val
14757   SDValue InVec = N->getOperand(0);
14758   EVT VT = InVec.getValueType();
14759   EVT NVT = N->getValueType(0);
14760 
14761   if (InVec.isUndef())
14762     return DAG.getUNDEF(NVT);
14763 
14764   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
14765     // Check if the result type doesn't match the inserted element type. A
14766     // SCALAR_TO_VECTOR may truncate the inserted element and the
14767     // EXTRACT_VECTOR_ELT may widen the extracted vector.
14768     SDValue InOp = InVec.getOperand(0);
14769     if (InOp.getValueType() != NVT) {
14770       assert(InOp.getValueType().isInteger() && NVT.isInteger());
14771       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
14772     }
14773     return InOp;
14774   }
14775 
14776   SDValue EltNo = N->getOperand(1);
14777   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
14778 
14779   // extract_vector_elt of out-of-bounds element -> UNDEF
14780   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
14781     return DAG.getUNDEF(NVT);
14782 
14783   // extract_vector_elt (build_vector x, y), 1 -> y
14784   if (ConstEltNo &&
14785       InVec.getOpcode() == ISD::BUILD_VECTOR &&
14786       TLI.isTypeLegal(VT) &&
14787       (InVec.hasOneUse() ||
14788        TLI.aggressivelyPreferBuildVectorSources(VT))) {
14789     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
14790     EVT InEltVT = Elt.getValueType();
14791 
14792     // Sometimes build_vector's scalar input types do not match result type.
14793     if (NVT == InEltVT)
14794       return Elt;
14795 
14796     // TODO: It may be useful to truncate if free if the build_vector implicitly
14797     // converts.
14798   }
14799 
14800   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
14801   bool isLE = DAG.getDataLayout().isLittleEndian();
14802   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
14803   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
14804       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
14805     SDValue BCSrc = InVec.getOperand(0);
14806     if (BCSrc.getValueType().isScalarInteger())
14807       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
14808   }
14809 
14810   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
14811   //
14812   // This only really matters if the index is non-constant since other combines
14813   // on the constant elements already work.
14814   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
14815       EltNo == InVec.getOperand(2)) {
14816     SDValue Elt = InVec.getOperand(1);
14817     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
14818   }
14819 
14820   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
14821   // We only perform this optimization before the op legalization phase because
14822   // we may introduce new vector instructions which are not backed by TD
14823   // patterns. For example on AVX, extracting elements from a wide vector
14824   // without using extract_subvector. However, if we can find an underlying
14825   // scalar value, then we can always use that.
14826   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
14827     int NumElem = VT.getVectorNumElements();
14828     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
14829     // Find the new index to extract from.
14830     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
14831 
14832     // Extracting an undef index is undef.
14833     if (OrigElt == -1)
14834       return DAG.getUNDEF(NVT);
14835 
14836     // Select the right vector half to extract from.
14837     SDValue SVInVec;
14838     if (OrigElt < NumElem) {
14839       SVInVec = InVec->getOperand(0);
14840     } else {
14841       SVInVec = InVec->getOperand(1);
14842       OrigElt -= NumElem;
14843     }
14844 
14845     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
14846       SDValue InOp = SVInVec.getOperand(OrigElt);
14847       if (InOp.getValueType() != NVT) {
14848         assert(InOp.getValueType().isInteger() && NVT.isInteger());
14849         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
14850       }
14851 
14852       return InOp;
14853     }
14854 
14855     // FIXME: We should handle recursing on other vector shuffles and
14856     // scalar_to_vector here as well.
14857 
14858     if (!LegalOperations ||
14859         // FIXME: Should really be just isOperationLegalOrCustom.
14860         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
14861         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
14862       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
14863       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
14864                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
14865     }
14866   }
14867 
14868   bool BCNumEltsChanged = false;
14869   EVT ExtVT = VT.getVectorElementType();
14870   EVT LVT = ExtVT;
14871 
14872   // If the result of load has to be truncated, then it's not necessarily
14873   // profitable.
14874   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
14875     return SDValue();
14876 
14877   if (InVec.getOpcode() == ISD::BITCAST) {
14878     // Don't duplicate a load with other uses.
14879     if (!InVec.hasOneUse())
14880       return SDValue();
14881 
14882     EVT BCVT = InVec.getOperand(0).getValueType();
14883     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
14884       return SDValue();
14885     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
14886       BCNumEltsChanged = true;
14887     InVec = InVec.getOperand(0);
14888     ExtVT = BCVT.getVectorElementType();
14889   }
14890 
14891   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
14892   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
14893       ISD::isNormalLoad(InVec.getNode()) &&
14894       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
14895     SDValue Index = N->getOperand(1);
14896     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
14897       if (!OrigLoad->isVolatile()) {
14898         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
14899                                                              OrigLoad);
14900       }
14901     }
14902   }
14903 
14904   // Perform only after legalization to ensure build_vector / vector_shuffle
14905   // optimizations have already been done.
14906   if (!LegalOperations) return SDValue();
14907 
14908   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
14909   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
14910   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
14911 
14912   if (ConstEltNo) {
14913     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
14914 
14915     LoadSDNode *LN0 = nullptr;
14916     const ShuffleVectorSDNode *SVN = nullptr;
14917     if (ISD::isNormalLoad(InVec.getNode())) {
14918       LN0 = cast<LoadSDNode>(InVec);
14919     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14920                InVec.getOperand(0).getValueType() == ExtVT &&
14921                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
14922       // Don't duplicate a load with other uses.
14923       if (!InVec.hasOneUse())
14924         return SDValue();
14925 
14926       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
14927     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
14928       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
14929       // =>
14930       // (load $addr+1*size)
14931 
14932       // Don't duplicate a load with other uses.
14933       if (!InVec.hasOneUse())
14934         return SDValue();
14935 
14936       // If the bit convert changed the number of elements, it is unsafe
14937       // to examine the mask.
14938       if (BCNumEltsChanged)
14939         return SDValue();
14940 
14941       // Select the input vector, guarding against out of range extract vector.
14942       unsigned NumElems = VT.getVectorNumElements();
14943       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
14944       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
14945 
14946       if (InVec.getOpcode() == ISD::BITCAST) {
14947         // Don't duplicate a load with other uses.
14948         if (!InVec.hasOneUse())
14949           return SDValue();
14950 
14951         InVec = InVec.getOperand(0);
14952       }
14953       if (ISD::isNormalLoad(InVec.getNode())) {
14954         LN0 = cast<LoadSDNode>(InVec);
14955         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
14956         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
14957       }
14958     }
14959 
14960     // Make sure we found a non-volatile load and the extractelement is
14961     // the only use.
14962     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
14963       return SDValue();
14964 
14965     // If Idx was -1 above, Elt is going to be -1, so just return undef.
14966     if (Elt == -1)
14967       return DAG.getUNDEF(LVT);
14968 
14969     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
14970   }
14971 
14972   return SDValue();
14973 }
14974 
14975 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
14976 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
14977   // We perform this optimization post type-legalization because
14978   // the type-legalizer often scalarizes integer-promoted vectors.
14979   // Performing this optimization before may create bit-casts which
14980   // will be type-legalized to complex code sequences.
14981   // We perform this optimization only before the operation legalizer because we
14982   // may introduce illegal operations.
14983   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
14984     return SDValue();
14985 
14986   unsigned NumInScalars = N->getNumOperands();
14987   SDLoc DL(N);
14988   EVT VT = N->getValueType(0);
14989 
14990   // Check to see if this is a BUILD_VECTOR of a bunch of values
14991   // which come from any_extend or zero_extend nodes. If so, we can create
14992   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
14993   // optimizations. We do not handle sign-extend because we can't fill the sign
14994   // using shuffles.
14995   EVT SourceType = MVT::Other;
14996   bool AllAnyExt = true;
14997 
14998   for (unsigned i = 0; i != NumInScalars; ++i) {
14999     SDValue In = N->getOperand(i);
15000     // Ignore undef inputs.
15001     if (In.isUndef()) continue;
15002 
15003     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
15004     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15005 
15006     // Abort if the element is not an extension.
15007     if (!ZeroExt && !AnyExt) {
15008       SourceType = MVT::Other;
15009       break;
15010     }
15011 
15012     // The input is a ZeroExt or AnyExt. Check the original type.
15013     EVT InTy = In.getOperand(0).getValueType();
15014 
15015     // Check that all of the widened source types are the same.
15016     if (SourceType == MVT::Other)
15017       // First time.
15018       SourceType = InTy;
15019     else if (InTy != SourceType) {
15020       // Multiple income types. Abort.
15021       SourceType = MVT::Other;
15022       break;
15023     }
15024 
15025     // Check if all of the extends are ANY_EXTENDs.
15026     AllAnyExt &= AnyExt;
15027   }
15028 
15029   // In order to have valid types, all of the inputs must be extended from the
15030   // same source type and all of the inputs must be any or zero extend.
15031   // Scalar sizes must be a power of two.
15032   EVT OutScalarTy = VT.getScalarType();
15033   bool ValidTypes = SourceType != MVT::Other &&
15034                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15035                  isPowerOf2_32(SourceType.getSizeInBits());
15036 
15037   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15038   // turn into a single shuffle instruction.
15039   if (!ValidTypes)
15040     return SDValue();
15041 
15042   bool isLE = DAG.getDataLayout().isLittleEndian();
15043   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15044   assert(ElemRatio > 1 && "Invalid element size ratio");
15045   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15046                                DAG.getConstant(0, DL, SourceType);
15047 
15048   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15049   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15050 
15051   // Populate the new build_vector
15052   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15053     SDValue Cast = N->getOperand(i);
15054     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15055             Cast.getOpcode() == ISD::ZERO_EXTEND ||
15056             Cast.isUndef()) && "Invalid cast opcode");
15057     SDValue In;
15058     if (Cast.isUndef())
15059       In = DAG.getUNDEF(SourceType);
15060     else
15061       In = Cast->getOperand(0);
15062     unsigned Index = isLE ? (i * ElemRatio) :
15063                             (i * ElemRatio + (ElemRatio - 1));
15064 
15065     assert(Index < Ops.size() && "Invalid index");
15066     Ops[Index] = In;
15067   }
15068 
15069   // The type of the new BUILD_VECTOR node.
15070   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15071   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15072          "Invalid vector size");
15073   // Check if the new vector type is legal.
15074   if (!isTypeLegal(VecVT) ||
15075       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15076        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15077     return SDValue();
15078 
15079   // Make the new BUILD_VECTOR.
15080   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15081 
15082   // The new BUILD_VECTOR node has the potential to be further optimized.
15083   AddToWorklist(BV.getNode());
15084   // Bitcast to the desired type.
15085   return DAG.getBitcast(VT, BV);
15086 }
15087 
15088 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
15089   EVT VT = N->getValueType(0);
15090 
15091   unsigned NumInScalars = N->getNumOperands();
15092   SDLoc DL(N);
15093 
15094   EVT SrcVT = MVT::Other;
15095   unsigned Opcode = ISD::DELETED_NODE;
15096   unsigned NumDefs = 0;
15097 
15098   for (unsigned i = 0; i != NumInScalars; ++i) {
15099     SDValue In = N->getOperand(i);
15100     unsigned Opc = In.getOpcode();
15101 
15102     if (Opc == ISD::UNDEF)
15103       continue;
15104 
15105     // If all scalar values are floats and converted from integers.
15106     if (Opcode == ISD::DELETED_NODE &&
15107         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
15108       Opcode = Opc;
15109     }
15110 
15111     if (Opc != Opcode)
15112       return SDValue();
15113 
15114     EVT InVT = In.getOperand(0).getValueType();
15115 
15116     // If all scalar values are typed differently, bail out. It's chosen to
15117     // simplify BUILD_VECTOR of integer types.
15118     if (SrcVT == MVT::Other)
15119       SrcVT = InVT;
15120     if (SrcVT != InVT)
15121       return SDValue();
15122     NumDefs++;
15123   }
15124 
15125   // If the vector has just one element defined, it's not worth to fold it into
15126   // a vectorized one.
15127   if (NumDefs < 2)
15128     return SDValue();
15129 
15130   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
15131          && "Should only handle conversion from integer to float.");
15132   assert(SrcVT != MVT::Other && "Cannot determine source type!");
15133 
15134   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
15135 
15136   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
15137     return SDValue();
15138 
15139   // Just because the floating-point vector type is legal does not necessarily
15140   // mean that the corresponding integer vector type is.
15141   if (!isTypeLegal(NVT))
15142     return SDValue();
15143 
15144   SmallVector<SDValue, 8> Opnds;
15145   for (unsigned i = 0; i != NumInScalars; ++i) {
15146     SDValue In = N->getOperand(i);
15147 
15148     if (In.isUndef())
15149       Opnds.push_back(DAG.getUNDEF(SrcVT));
15150     else
15151       Opnds.push_back(In.getOperand(0));
15152   }
15153   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
15154   AddToWorklist(BV.getNode());
15155 
15156   return DAG.getNode(Opcode, DL, VT, BV);
15157 }
15158 
15159 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15160                                            ArrayRef<int> VectorMask,
15161                                            SDValue VecIn1, SDValue VecIn2,
15162                                            unsigned LeftIdx) {
15163   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15164   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15165 
15166   EVT VT = N->getValueType(0);
15167   EVT InVT1 = VecIn1.getValueType();
15168   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15169 
15170   unsigned Vec2Offset = 0;
15171   unsigned NumElems = VT.getVectorNumElements();
15172   unsigned ShuffleNumElems = NumElems;
15173 
15174   // In case both the input vectors are extracted from same base
15175   // vector we do not need extra addend (Vec2Offset) while
15176   // computing shuffle mask.
15177   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15178       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15179       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15180     Vec2Offset = InVT1.getVectorNumElements();
15181 
15182   // We can't generate a shuffle node with mismatched input and output types.
15183   // Try to make the types match the type of the output.
15184   if (InVT1 != VT || InVT2 != VT) {
15185     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15186       // If the output vector length is a multiple of both input lengths,
15187       // we can concatenate them and pad the rest with undefs.
15188       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15189       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15190       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15191       ConcatOps[0] = VecIn1;
15192       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15193       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15194       VecIn2 = SDValue();
15195     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15196       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15197         return SDValue();
15198 
15199       if (!VecIn2.getNode()) {
15200         // If we only have one input vector, and it's twice the size of the
15201         // output, split it in two.
15202         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15203                              DAG.getConstant(NumElems, DL, IdxTy));
15204         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15205         // Since we now have shorter input vectors, adjust the offset of the
15206         // second vector's start.
15207         Vec2Offset = NumElems;
15208       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15209         // VecIn1 is wider than the output, and we have another, possibly
15210         // smaller input. Pad the smaller input with undefs, shuffle at the
15211         // input vector width, and extract the output.
15212         // The shuffle type is different than VT, so check legality again.
15213         if (LegalOperations &&
15214             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15215           return SDValue();
15216 
15217         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15218         // lower it back into a BUILD_VECTOR. So if the inserted type is
15219         // illegal, don't even try.
15220         if (InVT1 != InVT2) {
15221           if (!TLI.isTypeLegal(InVT2))
15222             return SDValue();
15223           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15224                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15225         }
15226         ShuffleNumElems = NumElems * 2;
15227       } else {
15228         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15229         // than VecIn1. We can't handle this for now - this case will disappear
15230         // when we start sorting the vectors by type.
15231         return SDValue();
15232       }
15233     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15234                InVT1.getSizeInBits() == VT.getSizeInBits()) {
15235       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15236       ConcatOps[0] = VecIn2;
15237       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15238     } else {
15239       // TODO: Support cases where the length mismatch isn't exactly by a
15240       // factor of 2.
15241       // TODO: Move this check upwards, so that if we have bad type
15242       // mismatches, we don't create any DAG nodes.
15243       return SDValue();
15244     }
15245   }
15246 
15247   // Initialize mask to undef.
15248   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15249 
15250   // Only need to run up to the number of elements actually used, not the
15251   // total number of elements in the shuffle - if we are shuffling a wider
15252   // vector, the high lanes should be set to undef.
15253   for (unsigned i = 0; i != NumElems; ++i) {
15254     if (VectorMask[i] <= 0)
15255       continue;
15256 
15257     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15258     if (VectorMask[i] == (int)LeftIdx) {
15259       Mask[i] = ExtIndex;
15260     } else if (VectorMask[i] == (int)LeftIdx + 1) {
15261       Mask[i] = Vec2Offset + ExtIndex;
15262     }
15263   }
15264 
15265   // The type the input vectors may have changed above.
15266   InVT1 = VecIn1.getValueType();
15267 
15268   // If we already have a VecIn2, it should have the same type as VecIn1.
15269   // If we don't, get an undef/zero vector of the appropriate type.
15270   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15271   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15272 
15273   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15274   if (ShuffleNumElems > NumElems)
15275     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
15276 
15277   return Shuffle;
15278 }
15279 
15280 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
15281 // operations. If the types of the vectors we're extracting from allow it,
15282 // turn this into a vector_shuffle node.
15283 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
15284   SDLoc DL(N);
15285   EVT VT = N->getValueType(0);
15286 
15287   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
15288   if (!isTypeLegal(VT))
15289     return SDValue();
15290 
15291   // May only combine to shuffle after legalize if shuffle is legal.
15292   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
15293     return SDValue();
15294 
15295   bool UsesZeroVector = false;
15296   unsigned NumElems = N->getNumOperands();
15297 
15298   // Record, for each element of the newly built vector, which input vector
15299   // that element comes from. -1 stands for undef, 0 for the zero vector,
15300   // and positive values for the input vectors.
15301   // VectorMask maps each element to its vector number, and VecIn maps vector
15302   // numbers to their initial SDValues.
15303 
15304   SmallVector<int, 8> VectorMask(NumElems, -1);
15305   SmallVector<SDValue, 8> VecIn;
15306   VecIn.push_back(SDValue());
15307 
15308   for (unsigned i = 0; i != NumElems; ++i) {
15309     SDValue Op = N->getOperand(i);
15310 
15311     if (Op.isUndef())
15312       continue;
15313 
15314     // See if we can use a blend with a zero vector.
15315     // TODO: Should we generalize this to a blend with an arbitrary constant
15316     // vector?
15317     if (isNullConstant(Op) || isNullFPConstant(Op)) {
15318       UsesZeroVector = true;
15319       VectorMask[i] = 0;
15320       continue;
15321     }
15322 
15323     // Not an undef or zero. If the input is something other than an
15324     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
15325     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15326         !isa<ConstantSDNode>(Op.getOperand(1)))
15327       return SDValue();
15328     SDValue ExtractedFromVec = Op.getOperand(0);
15329 
15330     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
15331     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
15332       return SDValue();
15333 
15334     // All inputs must have the same element type as the output.
15335     if (VT.getVectorElementType() !=
15336         ExtractedFromVec.getValueType().getVectorElementType())
15337       return SDValue();
15338 
15339     // Have we seen this input vector before?
15340     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
15341     // a map back from SDValues to numbers isn't worth it.
15342     unsigned Idx = std::distance(
15343         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
15344     if (Idx == VecIn.size())
15345       VecIn.push_back(ExtractedFromVec);
15346 
15347     VectorMask[i] = Idx;
15348   }
15349 
15350   // If we didn't find at least one input vector, bail out.
15351   if (VecIn.size() < 2)
15352     return SDValue();
15353 
15354   // If all the Operands of BUILD_VECTOR extract from same
15355   // vector, then split the vector efficiently based on the maximum
15356   // vector access index and adjust the VectorMask and
15357   // VecIn accordingly.
15358   if (VecIn.size() == 2) {
15359     unsigned MaxIndex = 0;
15360     unsigned NearestPow2 = 0;
15361     SDValue Vec = VecIn.back();
15362     EVT InVT = Vec.getValueType();
15363     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15364     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
15365 
15366     for (unsigned i = 0; i < NumElems; i++) {
15367       if (VectorMask[i] <= 0)
15368         continue;
15369       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
15370       IndexVec[i] = Index;
15371       MaxIndex = std::max(MaxIndex, Index);
15372     }
15373 
15374     NearestPow2 = PowerOf2Ceil(MaxIndex);
15375     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
15376         NumElems * 2 < NearestPow2) {
15377       unsigned SplitSize = NearestPow2 / 2;
15378       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
15379                                      InVT.getVectorElementType(), SplitSize);
15380       if (TLI.isTypeLegal(SplitVT)) {
15381         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15382                                      DAG.getConstant(SplitSize, DL, IdxTy));
15383         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15384                                      DAG.getConstant(0, DL, IdxTy));
15385         VecIn.pop_back();
15386         VecIn.push_back(VecIn1);
15387         VecIn.push_back(VecIn2);
15388 
15389         for (unsigned i = 0; i < NumElems; i++) {
15390           if (VectorMask[i] <= 0)
15391             continue;
15392           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
15393         }
15394       }
15395     }
15396   }
15397 
15398   // TODO: We want to sort the vectors by descending length, so that adjacent
15399   // pairs have similar length, and the longer vector is always first in the
15400   // pair.
15401 
15402   // TODO: Should this fire if some of the input vectors has illegal type (like
15403   // it does now), or should we let legalization run its course first?
15404 
15405   // Shuffle phase:
15406   // Take pairs of vectors, and shuffle them so that the result has elements
15407   // from these vectors in the correct places.
15408   // For example, given:
15409   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
15410   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
15411   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
15412   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
15413   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
15414   // We will generate:
15415   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
15416   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
15417   SmallVector<SDValue, 4> Shuffles;
15418   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
15419     unsigned LeftIdx = 2 * In + 1;
15420     SDValue VecLeft = VecIn[LeftIdx];
15421     SDValue VecRight =
15422         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
15423 
15424     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
15425                                                 VecRight, LeftIdx))
15426       Shuffles.push_back(Shuffle);
15427     else
15428       return SDValue();
15429   }
15430 
15431   // If we need the zero vector as an "ingredient" in the blend tree, add it
15432   // to the list of shuffles.
15433   if (UsesZeroVector)
15434     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
15435                                       : DAG.getConstantFP(0.0, DL, VT));
15436 
15437   // If we only have one shuffle, we're done.
15438   if (Shuffles.size() == 1)
15439     return Shuffles[0];
15440 
15441   // Update the vector mask to point to the post-shuffle vectors.
15442   for (int &Vec : VectorMask)
15443     if (Vec == 0)
15444       Vec = Shuffles.size() - 1;
15445     else
15446       Vec = (Vec - 1) / 2;
15447 
15448   // More than one shuffle. Generate a binary tree of blends, e.g. if from
15449   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
15450   // generate:
15451   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
15452   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
15453   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
15454   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
15455   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
15456   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
15457   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
15458 
15459   // Make sure the initial size of the shuffle list is even.
15460   if (Shuffles.size() % 2)
15461     Shuffles.push_back(DAG.getUNDEF(VT));
15462 
15463   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
15464     if (CurSize % 2) {
15465       Shuffles[CurSize] = DAG.getUNDEF(VT);
15466       CurSize++;
15467     }
15468     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
15469       int Left = 2 * In;
15470       int Right = 2 * In + 1;
15471       SmallVector<int, 8> Mask(NumElems, -1);
15472       for (unsigned i = 0; i != NumElems; ++i) {
15473         if (VectorMask[i] == Left) {
15474           Mask[i] = i;
15475           VectorMask[i] = In;
15476         } else if (VectorMask[i] == Right) {
15477           Mask[i] = i + NumElems;
15478           VectorMask[i] = In;
15479         }
15480       }
15481 
15482       Shuffles[In] =
15483           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
15484     }
15485   }
15486   return Shuffles[0];
15487 }
15488 
15489 // Try to turn a build vector of zero extends of extract vector elts into a
15490 // a vector zero extend and possibly an extract subvector.
15491 // TODO: Support sign extend or any extend?
15492 // TODO: Allow undef elements?
15493 // TODO: Don't require the extracts to start at element 0.
15494 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
15495   if (LegalOperations)
15496     return SDValue();
15497 
15498   EVT VT = N->getValueType(0);
15499 
15500   SDValue Op0 = N->getOperand(0);
15501   auto checkElem = [&](SDValue Op) -> int64_t {
15502     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
15503         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15504         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
15505       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
15506         return C->getZExtValue();
15507     return -1;
15508   };
15509 
15510   // Make sure the first element matches
15511   // (zext (extract_vector_elt X, C))
15512   int64_t Offset = checkElem(Op0);
15513   if (Offset < 0)
15514     return SDValue();
15515 
15516   unsigned NumElems = N->getNumOperands();
15517   SDValue In = Op0.getOperand(0).getOperand(0);
15518   EVT InSVT = In.getValueType().getScalarType();
15519   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
15520 
15521   // Don't create an illegal input type after type legalization.
15522   if (LegalTypes && !TLI.isTypeLegal(InVT))
15523     return SDValue();
15524 
15525   // Ensure all the elements come from the same vector and are adjacent.
15526   for (unsigned i = 1; i != NumElems; ++i) {
15527     if ((Offset + i) != checkElem(N->getOperand(i)))
15528       return SDValue();
15529   }
15530 
15531   SDLoc DL(N);
15532   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
15533                    Op0.getOperand(0).getOperand(1));
15534   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
15535 }
15536 
15537 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
15538   EVT VT = N->getValueType(0);
15539 
15540   // A vector built entirely of undefs is undef.
15541   if (ISD::allOperandsUndef(N))
15542     return DAG.getUNDEF(VT);
15543 
15544   // If this is a splat of a bitcast from another vector, change to a
15545   // concat_vector.
15546   // For example:
15547   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
15548   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
15549   //
15550   // If X is a build_vector itself, the concat can become a larger build_vector.
15551   // TODO: Maybe this is useful for non-splat too?
15552   if (!LegalOperations) {
15553     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
15554       Splat = peekThroughBitcast(Splat);
15555       EVT SrcVT = Splat.getValueType();
15556       if (SrcVT.isVector()) {
15557         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
15558         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
15559                                      SrcVT.getVectorElementType(), NumElts);
15560         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
15561           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
15562           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
15563                                        NewVT, Ops);
15564           return DAG.getBitcast(VT, Concat);
15565         }
15566       }
15567     }
15568   }
15569 
15570   // Check if we can express BUILD VECTOR via subvector extract.
15571   if (!LegalTypes && (N->getNumOperands() > 1)) {
15572     SDValue Op0 = N->getOperand(0);
15573     auto checkElem = [&](SDValue Op) -> uint64_t {
15574       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
15575           (Op0.getOperand(0) == Op.getOperand(0)))
15576         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
15577           return CNode->getZExtValue();
15578       return -1;
15579     };
15580 
15581     int Offset = checkElem(Op0);
15582     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
15583       if (Offset + i != checkElem(N->getOperand(i))) {
15584         Offset = -1;
15585         break;
15586       }
15587     }
15588 
15589     if ((Offset == 0) &&
15590         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
15591       return Op0.getOperand(0);
15592     if ((Offset != -1) &&
15593         ((Offset % N->getValueType(0).getVectorNumElements()) ==
15594          0)) // IDX must be multiple of output size.
15595       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
15596                          Op0.getOperand(0), Op0.getOperand(1));
15597   }
15598 
15599   if (SDValue V = convertBuildVecZextToZext(N))
15600     return V;
15601 
15602   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
15603     return V;
15604 
15605   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
15606     return V;
15607 
15608   if (SDValue V = reduceBuildVecToShuffle(N))
15609     return V;
15610 
15611   return SDValue();
15612 }
15613 
15614 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
15615   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15616   EVT OpVT = N->getOperand(0).getValueType();
15617 
15618   // If the operands are legal vectors, leave them alone.
15619   if (TLI.isTypeLegal(OpVT))
15620     return SDValue();
15621 
15622   SDLoc DL(N);
15623   EVT VT = N->getValueType(0);
15624   SmallVector<SDValue, 8> Ops;
15625 
15626   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
15627   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15628 
15629   // Keep track of what we encounter.
15630   bool AnyInteger = false;
15631   bool AnyFP = false;
15632   for (const SDValue &Op : N->ops()) {
15633     if (ISD::BITCAST == Op.getOpcode() &&
15634         !Op.getOperand(0).getValueType().isVector())
15635       Ops.push_back(Op.getOperand(0));
15636     else if (ISD::UNDEF == Op.getOpcode())
15637       Ops.push_back(ScalarUndef);
15638     else
15639       return SDValue();
15640 
15641     // Note whether we encounter an integer or floating point scalar.
15642     // If it's neither, bail out, it could be something weird like x86mmx.
15643     EVT LastOpVT = Ops.back().getValueType();
15644     if (LastOpVT.isFloatingPoint())
15645       AnyFP = true;
15646     else if (LastOpVT.isInteger())
15647       AnyInteger = true;
15648     else
15649       return SDValue();
15650   }
15651 
15652   // If any of the operands is a floating point scalar bitcast to a vector,
15653   // use floating point types throughout, and bitcast everything.
15654   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
15655   if (AnyFP) {
15656     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
15657     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
15658     if (AnyInteger) {
15659       for (SDValue &Op : Ops) {
15660         if (Op.getValueType() == SVT)
15661           continue;
15662         if (Op.isUndef())
15663           Op = ScalarUndef;
15664         else
15665           Op = DAG.getBitcast(SVT, Op);
15666       }
15667     }
15668   }
15669 
15670   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
15671                                VT.getSizeInBits() / SVT.getSizeInBits());
15672   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
15673 }
15674 
15675 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
15676 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
15677 // most two distinct vectors the same size as the result, attempt to turn this
15678 // into a legal shuffle.
15679 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
15680   EVT VT = N->getValueType(0);
15681   EVT OpVT = N->getOperand(0).getValueType();
15682   int NumElts = VT.getVectorNumElements();
15683   int NumOpElts = OpVT.getVectorNumElements();
15684 
15685   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
15686   SmallVector<int, 8> Mask;
15687 
15688   for (SDValue Op : N->ops()) {
15689     // Peek through any bitcast.
15690     Op = peekThroughBitcast(Op);
15691 
15692     // UNDEF nodes convert to UNDEF shuffle mask values.
15693     if (Op.isUndef()) {
15694       Mask.append((unsigned)NumOpElts, -1);
15695       continue;
15696     }
15697 
15698     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15699       return SDValue();
15700 
15701     // What vector are we extracting the subvector from and at what index?
15702     SDValue ExtVec = Op.getOperand(0);
15703 
15704     // We want the EVT of the original extraction to correctly scale the
15705     // extraction index.
15706     EVT ExtVT = ExtVec.getValueType();
15707 
15708     // Peek through any bitcast.
15709     ExtVec = peekThroughBitcast(ExtVec);
15710 
15711     // UNDEF nodes convert to UNDEF shuffle mask values.
15712     if (ExtVec.isUndef()) {
15713       Mask.append((unsigned)NumOpElts, -1);
15714       continue;
15715     }
15716 
15717     if (!isa<ConstantSDNode>(Op.getOperand(1)))
15718       return SDValue();
15719     int ExtIdx = Op.getConstantOperandVal(1);
15720 
15721     // Ensure that we are extracting a subvector from a vector the same
15722     // size as the result.
15723     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
15724       return SDValue();
15725 
15726     // Scale the subvector index to account for any bitcast.
15727     int NumExtElts = ExtVT.getVectorNumElements();
15728     if (0 == (NumExtElts % NumElts))
15729       ExtIdx /= (NumExtElts / NumElts);
15730     else if (0 == (NumElts % NumExtElts))
15731       ExtIdx *= (NumElts / NumExtElts);
15732     else
15733       return SDValue();
15734 
15735     // At most we can reference 2 inputs in the final shuffle.
15736     if (SV0.isUndef() || SV0 == ExtVec) {
15737       SV0 = ExtVec;
15738       for (int i = 0; i != NumOpElts; ++i)
15739         Mask.push_back(i + ExtIdx);
15740     } else if (SV1.isUndef() || SV1 == ExtVec) {
15741       SV1 = ExtVec;
15742       for (int i = 0; i != NumOpElts; ++i)
15743         Mask.push_back(i + ExtIdx + NumElts);
15744     } else {
15745       return SDValue();
15746     }
15747   }
15748 
15749   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
15750     return SDValue();
15751 
15752   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
15753                               DAG.getBitcast(VT, SV1), Mask);
15754 }
15755 
15756 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
15757   // If we only have one input vector, we don't need to do any concatenation.
15758   if (N->getNumOperands() == 1)
15759     return N->getOperand(0);
15760 
15761   // Check if all of the operands are undefs.
15762   EVT VT = N->getValueType(0);
15763   if (ISD::allOperandsUndef(N))
15764     return DAG.getUNDEF(VT);
15765 
15766   // Optimize concat_vectors where all but the first of the vectors are undef.
15767   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
15768         return Op.isUndef();
15769       })) {
15770     SDValue In = N->getOperand(0);
15771     assert(In.getValueType().isVector() && "Must concat vectors");
15772 
15773     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
15774     if (In->getOpcode() == ISD::BITCAST &&
15775         !In->getOperand(0).getValueType().isVector()) {
15776       SDValue Scalar = In->getOperand(0);
15777 
15778       // If the bitcast type isn't legal, it might be a trunc of a legal type;
15779       // look through the trunc so we can still do the transform:
15780       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
15781       if (Scalar->getOpcode() == ISD::TRUNCATE &&
15782           !TLI.isTypeLegal(Scalar.getValueType()) &&
15783           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
15784         Scalar = Scalar->getOperand(0);
15785 
15786       EVT SclTy = Scalar->getValueType(0);
15787 
15788       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
15789         return SDValue();
15790 
15791       // Bail out if the vector size is not a multiple of the scalar size.
15792       if (VT.getSizeInBits() % SclTy.getSizeInBits())
15793         return SDValue();
15794 
15795       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
15796       if (VNTNumElms < 2)
15797         return SDValue();
15798 
15799       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
15800       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
15801         return SDValue();
15802 
15803       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
15804       return DAG.getBitcast(VT, Res);
15805     }
15806   }
15807 
15808   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
15809   // We have already tested above for an UNDEF only concatenation.
15810   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
15811   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
15812   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
15813     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
15814   };
15815   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
15816     SmallVector<SDValue, 8> Opnds;
15817     EVT SVT = VT.getScalarType();
15818 
15819     EVT MinVT = SVT;
15820     if (!SVT.isFloatingPoint()) {
15821       // If BUILD_VECTOR are from built from integer, they may have different
15822       // operand types. Get the smallest type and truncate all operands to it.
15823       bool FoundMinVT = false;
15824       for (const SDValue &Op : N->ops())
15825         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15826           EVT OpSVT = Op.getOperand(0).getValueType();
15827           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
15828           FoundMinVT = true;
15829         }
15830       assert(FoundMinVT && "Concat vector type mismatch");
15831     }
15832 
15833     for (const SDValue &Op : N->ops()) {
15834       EVT OpVT = Op.getValueType();
15835       unsigned NumElts = OpVT.getVectorNumElements();
15836 
15837       if (ISD::UNDEF == Op.getOpcode())
15838         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
15839 
15840       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
15841         if (SVT.isFloatingPoint()) {
15842           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
15843           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
15844         } else {
15845           for (unsigned i = 0; i != NumElts; ++i)
15846             Opnds.push_back(
15847                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
15848         }
15849       }
15850     }
15851 
15852     assert(VT.getVectorNumElements() == Opnds.size() &&
15853            "Concat vector type mismatch");
15854     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
15855   }
15856 
15857   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
15858   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
15859     return V;
15860 
15861   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
15862   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
15863     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
15864       return V;
15865 
15866   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
15867   // nodes often generate nop CONCAT_VECTOR nodes.
15868   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
15869   // place the incoming vectors at the exact same location.
15870   SDValue SingleSource = SDValue();
15871   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
15872 
15873   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15874     SDValue Op = N->getOperand(i);
15875 
15876     if (Op.isUndef())
15877       continue;
15878 
15879     // Check if this is the identity extract:
15880     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15881       return SDValue();
15882 
15883     // Find the single incoming vector for the extract_subvector.
15884     if (SingleSource.getNode()) {
15885       if (Op.getOperand(0) != SingleSource)
15886         return SDValue();
15887     } else {
15888       SingleSource = Op.getOperand(0);
15889 
15890       // Check the source type is the same as the type of the result.
15891       // If not, this concat may extend the vector, so we can not
15892       // optimize it away.
15893       if (SingleSource.getValueType() != N->getValueType(0))
15894         return SDValue();
15895     }
15896 
15897     unsigned IdentityIndex = i * PartNumElem;
15898     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
15899     // The extract index must be constant.
15900     if (!CS)
15901       return SDValue();
15902 
15903     // Check that we are reading from the identity index.
15904     if (CS->getZExtValue() != IdentityIndex)
15905       return SDValue();
15906   }
15907 
15908   if (SingleSource.getNode())
15909     return SingleSource;
15910 
15911   return SDValue();
15912 }
15913 
15914 /// If we are extracting a subvector produced by a wide binary operator with at
15915 /// at least one operand that was the result of a vector concatenation, then try
15916 /// to use the narrow vector operands directly to avoid the concatenation and
15917 /// extraction.
15918 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
15919   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
15920   // some of these bailouts with other transforms.
15921 
15922   // The extract index must be a constant, so we can map it to a concat operand.
15923   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
15924   if (!ExtractIndex)
15925     return SDValue();
15926 
15927   // Only handle the case where we are doubling and then halving. A larger ratio
15928   // may require more than two narrow binops to replace the wide binop.
15929   EVT VT = Extract->getValueType(0);
15930   unsigned NumElems = VT.getVectorNumElements();
15931   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
15932          "Extract index is not a multiple of the vector length.");
15933   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
15934     return SDValue();
15935 
15936   // We are looking for an optionally bitcasted wide vector binary operator
15937   // feeding an extract subvector.
15938   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
15939 
15940   // TODO: The motivating case for this transform is an x86 AVX1 target. That
15941   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
15942   // flavors, but no other 256-bit integer support. This could be extended to
15943   // handle any binop, but that may require fixing/adding other folds to avoid
15944   // codegen regressions.
15945   unsigned BOpcode = BinOp.getOpcode();
15946   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
15947     return SDValue();
15948 
15949   // The binop must be a vector type, so we can chop it in half.
15950   EVT WideBVT = BinOp.getValueType();
15951   if (!WideBVT.isVector())
15952     return SDValue();
15953 
15954   // Bail out if the target does not support a narrower version of the binop.
15955   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
15956                                    WideBVT.getVectorNumElements() / 2);
15957   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15958   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
15959     return SDValue();
15960 
15961   // Peek through bitcasts of the binary operator operands if needed.
15962   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
15963   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
15964 
15965   // We need at least one concatenation operation of a binop operand to make
15966   // this transform worthwhile. The concat must double the input vector sizes.
15967   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
15968   bool ConcatL =
15969       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
15970   bool ConcatR =
15971       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
15972   if (!ConcatL && !ConcatR)
15973     return SDValue();
15974 
15975   // If one of the binop operands was not the result of a concat, we must
15976   // extract a half-sized operand for our new narrow binop. We can't just reuse
15977   // the original extract index operand because we may have bitcasted.
15978   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
15979   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
15980   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
15981   SDLoc DL(Extract);
15982 
15983   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
15984   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
15985   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
15986   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
15987                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15988                                     BinOp.getOperand(0),
15989                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15990 
15991   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
15992                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
15993                                     BinOp.getOperand(1),
15994                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
15995 
15996   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
15997   return DAG.getBitcast(VT, NarrowBinOp);
15998 }
15999 
16000 /// If we are extracting a subvector from a wide vector load, convert to a
16001 /// narrow load to eliminate the extraction:
16002 /// (extract_subvector (load wide vector)) --> (load narrow vector)
16003 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16004   // TODO: Add support for big-endian. The offset calculation must be adjusted.
16005   if (DAG.getDataLayout().isBigEndian())
16006     return SDValue();
16007 
16008   // TODO: The one-use check is overly conservative. Check the cost of the
16009   // extract instead or remove that condition entirely.
16010   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16011   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16012   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
16013       !ExtIdx)
16014     return SDValue();
16015 
16016   // The narrow load will be offset from the base address of the old load if
16017   // we are extracting from something besides index 0 (little-endian).
16018   EVT VT = Extract->getValueType(0);
16019   SDLoc DL(Extract);
16020   SDValue BaseAddr = Ld->getOperand(1);
16021   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16022 
16023   // TODO: Use "BaseIndexOffset" to make this more effective.
16024   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16025   MachineFunction &MF = DAG.getMachineFunction();
16026   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16027                                                    VT.getStoreSize());
16028   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16029   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16030   return NewLd;
16031 }
16032 
16033 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16034   EVT NVT = N->getValueType(0);
16035   SDValue V = N->getOperand(0);
16036 
16037   // Extract from UNDEF is UNDEF.
16038   if (V.isUndef())
16039     return DAG.getUNDEF(NVT);
16040 
16041   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16042     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16043       return NarrowLoad;
16044 
16045   // Combine:
16046   //    (extract_subvec (concat V1, V2, ...), i)
16047   // Into:
16048   //    Vi if possible
16049   // Only operand 0 is checked as 'concat' assumes all inputs of the same
16050   // type.
16051   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16052       isa<ConstantSDNode>(N->getOperand(1)) &&
16053       V->getOperand(0).getValueType() == NVT) {
16054     unsigned Idx = N->getConstantOperandVal(1);
16055     unsigned NumElems = NVT.getVectorNumElements();
16056     assert((Idx % NumElems) == 0 &&
16057            "IDX in concat is not a multiple of the result vector length.");
16058     return V->getOperand(Idx / NumElems);
16059   }
16060 
16061   // Skip bitcasting
16062   V = peekThroughBitcast(V);
16063 
16064   // If the input is a build vector. Try to make a smaller build vector.
16065   if (V->getOpcode() == ISD::BUILD_VECTOR) {
16066     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16067       EVT InVT = V->getValueType(0);
16068       unsigned ExtractSize = NVT.getSizeInBits();
16069       unsigned EltSize = InVT.getScalarSizeInBits();
16070       // Only do this if we won't split any elements.
16071       if (ExtractSize % EltSize == 0) {
16072         unsigned NumElems = ExtractSize / EltSize;
16073         EVT EltVT = InVT.getVectorElementType();
16074         EVT ExtractVT = NumElems == 1 ? EltVT :
16075           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16076         if ((Level < AfterLegalizeDAG ||
16077              (NumElems == 1 ||
16078               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16079             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16080           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16081                             EltSize;
16082           if (NumElems == 1) {
16083             SDValue Src = V->getOperand(IdxVal);
16084             if (EltVT != Src.getValueType())
16085               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16086 
16087             return DAG.getBitcast(NVT, Src);
16088           }
16089 
16090           // Extract the pieces from the original build_vector.
16091           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16092                                             makeArrayRef(V->op_begin() + IdxVal,
16093                                                          NumElems));
16094           return DAG.getBitcast(NVT, BuildVec);
16095         }
16096       }
16097     }
16098   }
16099 
16100   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16101     // Handle only simple case where vector being inserted and vector
16102     // being extracted are of same size.
16103     EVT SmallVT = V->getOperand(1).getValueType();
16104     if (!NVT.bitsEq(SmallVT))
16105       return SDValue();
16106 
16107     // Only handle cases where both indexes are constants.
16108     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16109     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16110 
16111     if (InsIdx && ExtIdx) {
16112       // Combine:
16113       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16114       // Into:
16115       //    indices are equal or bit offsets are equal => V1
16116       //    otherwise => (extract_subvec V1, ExtIdx)
16117       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16118           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16119         return DAG.getBitcast(NVT, V->getOperand(1));
16120       return DAG.getNode(
16121           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16122           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16123           N->getOperand(1));
16124     }
16125   }
16126 
16127   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16128     return NarrowBOp;
16129 
16130   return SDValue();
16131 }
16132 
16133 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16134 // or turn a shuffle of a single concat into simpler shuffle then concat.
16135 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16136   EVT VT = N->getValueType(0);
16137   unsigned NumElts = VT.getVectorNumElements();
16138 
16139   SDValue N0 = N->getOperand(0);
16140   SDValue N1 = N->getOperand(1);
16141   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16142 
16143   SmallVector<SDValue, 4> Ops;
16144   EVT ConcatVT = N0.getOperand(0).getValueType();
16145   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16146   unsigned NumConcats = NumElts / NumElemsPerConcat;
16147 
16148   // Special case: shuffle(concat(A,B)) can be more efficiently represented
16149   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16150   // half vector elements.
16151   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16152       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16153                   SVN->getMask().end(), [](int i) { return i == -1; })) {
16154     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16155                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16156     N1 = DAG.getUNDEF(ConcatVT);
16157     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16158   }
16159 
16160   // Look at every vector that's inserted. We're looking for exact
16161   // subvector-sized copies from a concatenated vector
16162   for (unsigned I = 0; I != NumConcats; ++I) {
16163     // Make sure we're dealing with a copy.
16164     unsigned Begin = I * NumElemsPerConcat;
16165     bool AllUndef = true, NoUndef = true;
16166     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16167       if (SVN->getMaskElt(J) >= 0)
16168         AllUndef = false;
16169       else
16170         NoUndef = false;
16171     }
16172 
16173     if (NoUndef) {
16174       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16175         return SDValue();
16176 
16177       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16178         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16179           return SDValue();
16180 
16181       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16182       if (FirstElt < N0.getNumOperands())
16183         Ops.push_back(N0.getOperand(FirstElt));
16184       else
16185         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16186 
16187     } else if (AllUndef) {
16188       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16189     } else { // Mixed with general masks and undefs, can't do optimization.
16190       return SDValue();
16191     }
16192   }
16193 
16194   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16195 }
16196 
16197 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16198 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16199 //
16200 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16201 // a simplification in some sense, but it isn't appropriate in general: some
16202 // BUILD_VECTORs are substantially cheaper than others. The general case
16203 // of a BUILD_VECTOR requires inserting each element individually (or
16204 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16205 // all constants is a single constant pool load.  A BUILD_VECTOR where each
16206 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
16207 // are undef lowers to a small number of element insertions.
16208 //
16209 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16210 // We don't fold shuffles where one side is a non-zero constant, and we don't
16211 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16212 // non-constant operands. This seems to work out reasonably well in practice.
16213 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16214                                        SelectionDAG &DAG,
16215                                        const TargetLowering &TLI) {
16216   EVT VT = SVN->getValueType(0);
16217   unsigned NumElts = VT.getVectorNumElements();
16218   SDValue N0 = SVN->getOperand(0);
16219   SDValue N1 = SVN->getOperand(1);
16220 
16221   if (!N0->hasOneUse() || !N1->hasOneUse())
16222     return SDValue();
16223 
16224   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16225   // discussed above.
16226   if (!N1.isUndef()) {
16227     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16228     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16229     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16230       return SDValue();
16231     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16232       return SDValue();
16233   }
16234 
16235   // If both inputs are splats of the same value then we can safely merge this
16236   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16237   bool IsSplat = false;
16238   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16239   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16240   if (BV0 && BV1)
16241     if (SDValue Splat0 = BV0->getSplatValue())
16242       IsSplat = (Splat0 == BV1->getSplatValue());
16243 
16244   SmallVector<SDValue, 8> Ops;
16245   SmallSet<SDValue, 16> DuplicateOps;
16246   for (int M : SVN->getMask()) {
16247     SDValue Op = DAG.getUNDEF(VT.getScalarType());
16248     if (M >= 0) {
16249       int Idx = M < (int)NumElts ? M : M - NumElts;
16250       SDValue &S = (M < (int)NumElts ? N0 : N1);
16251       if (S.getOpcode() == ISD::BUILD_VECTOR) {
16252         Op = S.getOperand(Idx);
16253       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16254         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16255         Op = S.getOperand(0);
16256       } else {
16257         // Operand can't be combined - bail out.
16258         return SDValue();
16259       }
16260     }
16261 
16262     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16263     // generating a splat; semantically, this is fine, but it's likely to
16264     // generate low-quality code if the target can't reconstruct an appropriate
16265     // shuffle.
16266     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16267       if (!IsSplat && !DuplicateOps.insert(Op).second)
16268         return SDValue();
16269 
16270     Ops.push_back(Op);
16271   }
16272 
16273   // BUILD_VECTOR requires all inputs to be of the same type, find the
16274   // maximum type and extend them all.
16275   EVT SVT = VT.getScalarType();
16276   if (SVT.isInteger())
16277     for (SDValue &Op : Ops)
16278       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
16279   if (SVT != VT.getScalarType())
16280     for (SDValue &Op : Ops)
16281       Op = TLI.isZExtFree(Op.getValueType(), SVT)
16282                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
16283                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
16284   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
16285 }
16286 
16287 // Match shuffles that can be converted to any_vector_extend_in_reg.
16288 // This is often generated during legalization.
16289 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
16290 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
16291 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
16292                                             SelectionDAG &DAG,
16293                                             const TargetLowering &TLI,
16294                                             bool LegalOperations,
16295                                             bool LegalTypes) {
16296   EVT VT = SVN->getValueType(0);
16297   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16298 
16299   // TODO Add support for big-endian when we have a test case.
16300   if (!VT.isInteger() || IsBigEndian)
16301     return SDValue();
16302 
16303   unsigned NumElts = VT.getVectorNumElements();
16304   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16305   ArrayRef<int> Mask = SVN->getMask();
16306   SDValue N0 = SVN->getOperand(0);
16307 
16308   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
16309   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
16310     for (unsigned i = 0; i != NumElts; ++i) {
16311       if (Mask[i] < 0)
16312         continue;
16313       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
16314         continue;
16315       return false;
16316     }
16317     return true;
16318   };
16319 
16320   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
16321   // power-of-2 extensions as they are the most likely.
16322   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
16323     // Check for non power of 2 vector sizes
16324     if (NumElts % Scale != 0)
16325       continue;
16326     if (!isAnyExtend(Scale))
16327       continue;
16328 
16329     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
16330     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
16331     if (!LegalTypes || TLI.isTypeLegal(OutVT))
16332       if (!LegalOperations ||
16333           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
16334         return DAG.getBitcast(VT,
16335                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
16336   }
16337 
16338   return SDValue();
16339 }
16340 
16341 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
16342 // each source element of a large type into the lowest elements of a smaller
16343 // destination type. This is often generated during legalization.
16344 // If the source node itself was a '*_extend_vector_inreg' node then we should
16345 // then be able to remove it.
16346 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
16347                                         SelectionDAG &DAG) {
16348   EVT VT = SVN->getValueType(0);
16349   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16350 
16351   // TODO Add support for big-endian when we have a test case.
16352   if (!VT.isInteger() || IsBigEndian)
16353     return SDValue();
16354 
16355   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
16356 
16357   unsigned Opcode = N0.getOpcode();
16358   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
16359       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
16360       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
16361     return SDValue();
16362 
16363   SDValue N00 = N0.getOperand(0);
16364   ArrayRef<int> Mask = SVN->getMask();
16365   unsigned NumElts = VT.getVectorNumElements();
16366   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16367   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
16368   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
16369 
16370   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
16371     return SDValue();
16372   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
16373 
16374   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
16375   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
16376   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
16377   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
16378     for (unsigned i = 0; i != NumElts; ++i) {
16379       if (Mask[i] < 0)
16380         continue;
16381       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
16382         continue;
16383       return false;
16384     }
16385     return true;
16386   };
16387 
16388   // At the moment we just handle the case where we've truncated back to the
16389   // same size as before the extension.
16390   // TODO: handle more extension/truncation cases as cases arise.
16391   if (EltSizeInBits != ExtSrcSizeInBits)
16392     return SDValue();
16393 
16394   // We can remove *extend_vector_inreg only if the truncation happens at
16395   // the same scale as the extension.
16396   if (isTruncate(ExtScale))
16397     return DAG.getBitcast(VT, N00);
16398 
16399   return SDValue();
16400 }
16401 
16402 // Combine shuffles of splat-shuffles of the form:
16403 // shuffle (shuffle V, undef, splat-mask), undef, M
16404 // If splat-mask contains undef elements, we need to be careful about
16405 // introducing undef's in the folded mask which are not the result of composing
16406 // the masks of the shuffles.
16407 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
16408                                      ShuffleVectorSDNode *Splat,
16409                                      SelectionDAG &DAG) {
16410   ArrayRef<int> SplatMask = Splat->getMask();
16411   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
16412 
16413   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
16414   // every undef mask element in the splat-shuffle has a corresponding undef
16415   // element in the user-shuffle's mask or if the composition of mask elements
16416   // would result in undef.
16417   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
16418   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
16419   //   In this case it is not legal to simplify to the splat-shuffle because we
16420   //   may be exposing the users of the shuffle an undef element at index 1
16421   //   which was not there before the combine.
16422   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
16423   //   In this case the composition of masks yields SplatMask, so it's ok to
16424   //   simplify to the splat-shuffle.
16425   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
16426   //   In this case the composed mask includes all undef elements of SplatMask
16427   //   and in addition sets element zero to undef. It is safe to simplify to
16428   //   the splat-shuffle.
16429   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
16430                                        ArrayRef<int> SplatMask) {
16431     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
16432       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
16433           SplatMask[UserMask[i]] != -1)
16434         return false;
16435     return true;
16436   };
16437   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
16438     return SDValue(Splat, 0);
16439 
16440   // Create a new shuffle with a mask that is composed of the two shuffles'
16441   // masks.
16442   SmallVector<int, 32> NewMask;
16443   for (int Idx : UserMask)
16444     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
16445 
16446   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
16447                               Splat->getOperand(0), Splat->getOperand(1),
16448                               NewMask);
16449 }
16450 
16451 /// If the shuffle mask is taking exactly one element from the first vector
16452 /// operand and passing through all other elements from the second vector
16453 /// operand, return the index of the mask element that is choosing an element
16454 /// from the first operand. Otherwise, return -1.
16455 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
16456   int MaskSize = Mask.size();
16457   int EltFromOp0 = -1;
16458   // TODO: This does not match if there are undef elements in the shuffle mask.
16459   // Should we ignore undefs in the shuffle mask instead? The trade-off is
16460   // removing an instruction (a shuffle), but losing the knowledge that some
16461   // vector lanes are not needed.
16462   for (int i = 0; i != MaskSize; ++i) {
16463     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
16464       // We're looking for a shuffle of exactly one element from operand 0.
16465       if (EltFromOp0 != -1)
16466         return -1;
16467       EltFromOp0 = i;
16468     } else if (Mask[i] != i + MaskSize) {
16469       // Nothing from operand 1 can change lanes.
16470       return -1;
16471     }
16472   }
16473   return EltFromOp0;
16474 }
16475 
16476 /// If a shuffle inserts exactly one element from a source vector operand into
16477 /// another vector operand and we can access the specified element as a scalar,
16478 /// then we can eliminate the shuffle.
16479 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
16480                                       SelectionDAG &DAG) {
16481   // First, check if we are taking one element of a vector and shuffling that
16482   // element into another vector.
16483   ArrayRef<int> Mask = Shuf->getMask();
16484   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
16485   SDValue Op0 = Shuf->getOperand(0);
16486   SDValue Op1 = Shuf->getOperand(1);
16487   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
16488   if (ShufOp0Index == -1) {
16489     // Commute mask and check again.
16490     ShuffleVectorSDNode::commuteMask(CommutedMask);
16491     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
16492     if (ShufOp0Index == -1)
16493       return SDValue();
16494     // Commute operands to match the commuted shuffle mask.
16495     std::swap(Op0, Op1);
16496     Mask = CommutedMask;
16497   }
16498 
16499   // The shuffle inserts exactly one element from operand 0 into operand 1.
16500   // Now see if we can access that element as a scalar via a real insert element
16501   // instruction.
16502   // TODO: We can try harder to locate the element as a scalar. Examples: it
16503   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
16504   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
16505          "Shuffle mask value must be from operand 0");
16506   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
16507     return SDValue();
16508 
16509   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
16510   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
16511     return SDValue();
16512 
16513   // There's an existing insertelement with constant insertion index, so we
16514   // don't need to check the legality/profitability of a replacement operation
16515   // that differs at most in the constant value. The target should be able to
16516   // lower any of those in a similar way. If not, legalization will expand this
16517   // to a scalar-to-vector plus shuffle.
16518   //
16519   // Note that the shuffle may move the scalar from the position that the insert
16520   // element used. Therefore, our new insert element occurs at the shuffle's
16521   // mask index value, not the insert's index value.
16522   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
16523   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
16524                                         Op0.getOperand(2).getValueType());
16525   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
16526                      Op1, Op0.getOperand(1), NewInsIndex);
16527 }
16528 
16529 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
16530   EVT VT = N->getValueType(0);
16531   unsigned NumElts = VT.getVectorNumElements();
16532 
16533   SDValue N0 = N->getOperand(0);
16534   SDValue N1 = N->getOperand(1);
16535 
16536   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
16537 
16538   // Canonicalize shuffle undef, undef -> undef
16539   if (N0.isUndef() && N1.isUndef())
16540     return DAG.getUNDEF(VT);
16541 
16542   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16543 
16544   // Canonicalize shuffle v, v -> v, undef
16545   if (N0 == N1) {
16546     SmallVector<int, 8> NewMask;
16547     for (unsigned i = 0; i != NumElts; ++i) {
16548       int Idx = SVN->getMaskElt(i);
16549       if (Idx >= (int)NumElts) Idx -= NumElts;
16550       NewMask.push_back(Idx);
16551     }
16552     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
16553   }
16554 
16555   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
16556   if (N0.isUndef())
16557     return DAG.getCommutedVectorShuffle(*SVN);
16558 
16559   // Remove references to rhs if it is undef
16560   if (N1.isUndef()) {
16561     bool Changed = false;
16562     SmallVector<int, 8> NewMask;
16563     for (unsigned i = 0; i != NumElts; ++i) {
16564       int Idx = SVN->getMaskElt(i);
16565       if (Idx >= (int)NumElts) {
16566         Idx = -1;
16567         Changed = true;
16568       }
16569       NewMask.push_back(Idx);
16570     }
16571     if (Changed)
16572       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
16573   }
16574 
16575   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
16576     return InsElt;
16577 
16578   // A shuffle of a single vector that is a splat can always be folded.
16579   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
16580     if (N1->isUndef() && N0Shuf->isSplat())
16581       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
16582 
16583   // If it is a splat, check if the argument vector is another splat or a
16584   // build_vector.
16585   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
16586     SDNode *V = N0.getNode();
16587 
16588     // If this is a bit convert that changes the element type of the vector but
16589     // not the number of vector elements, look through it.  Be careful not to
16590     // look though conversions that change things like v4f32 to v2f64.
16591     if (V->getOpcode() == ISD::BITCAST) {
16592       SDValue ConvInput = V->getOperand(0);
16593       if (ConvInput.getValueType().isVector() &&
16594           ConvInput.getValueType().getVectorNumElements() == NumElts)
16595         V = ConvInput.getNode();
16596     }
16597 
16598     if (V->getOpcode() == ISD::BUILD_VECTOR) {
16599       assert(V->getNumOperands() == NumElts &&
16600              "BUILD_VECTOR has wrong number of operands");
16601       SDValue Base;
16602       bool AllSame = true;
16603       for (unsigned i = 0; i != NumElts; ++i) {
16604         if (!V->getOperand(i).isUndef()) {
16605           Base = V->getOperand(i);
16606           break;
16607         }
16608       }
16609       // Splat of <u, u, u, u>, return <u, u, u, u>
16610       if (!Base.getNode())
16611         return N0;
16612       for (unsigned i = 0; i != NumElts; ++i) {
16613         if (V->getOperand(i) != Base) {
16614           AllSame = false;
16615           break;
16616         }
16617       }
16618       // Splat of <x, x, x, x>, return <x, x, x, x>
16619       if (AllSame)
16620         return N0;
16621 
16622       // Canonicalize any other splat as a build_vector.
16623       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
16624       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
16625       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
16626 
16627       // We may have jumped through bitcasts, so the type of the
16628       // BUILD_VECTOR may not match the type of the shuffle.
16629       if (V->getValueType(0) != VT)
16630         NewBV = DAG.getBitcast(VT, NewBV);
16631       return NewBV;
16632     }
16633   }
16634 
16635   // Simplify source operands based on shuffle mask.
16636   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16637     return SDValue(N, 0);
16638 
16639   // Match shuffles that can be converted to any_vector_extend_in_reg.
16640   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
16641     return V;
16642 
16643   // Combine "truncate_vector_in_reg" style shuffles.
16644   if (SDValue V = combineTruncationShuffle(SVN, DAG))
16645     return V;
16646 
16647   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
16648       Level < AfterLegalizeVectorOps &&
16649       (N1.isUndef() ||
16650       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
16651        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
16652     if (SDValue V = partitionShuffleOfConcats(N, DAG))
16653       return V;
16654   }
16655 
16656   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16657   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16658   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16659     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
16660       return Res;
16661 
16662   // If this shuffle only has a single input that is a bitcasted shuffle,
16663   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
16664   // back to their original types.
16665   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16666       N1.isUndef() && Level < AfterLegalizeVectorOps &&
16667       TLI.isTypeLegal(VT)) {
16668 
16669     // Peek through the bitcast only if there is one user.
16670     SDValue BC0 = N0;
16671     while (BC0.getOpcode() == ISD::BITCAST) {
16672       if (!BC0.hasOneUse())
16673         break;
16674       BC0 = BC0.getOperand(0);
16675     }
16676 
16677     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
16678       if (Scale == 1)
16679         return SmallVector<int, 8>(Mask.begin(), Mask.end());
16680 
16681       SmallVector<int, 8> NewMask;
16682       for (int M : Mask)
16683         for (int s = 0; s != Scale; ++s)
16684           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
16685       return NewMask;
16686     };
16687 
16688     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
16689       EVT SVT = VT.getScalarType();
16690       EVT InnerVT = BC0->getValueType(0);
16691       EVT InnerSVT = InnerVT.getScalarType();
16692 
16693       // Determine which shuffle works with the smaller scalar type.
16694       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
16695       EVT ScaleSVT = ScaleVT.getScalarType();
16696 
16697       if (TLI.isTypeLegal(ScaleVT) &&
16698           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
16699           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
16700         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16701         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
16702 
16703         // Scale the shuffle masks to the smaller scalar type.
16704         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
16705         SmallVector<int, 8> InnerMask =
16706             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
16707         SmallVector<int, 8> OuterMask =
16708             ScaleShuffleMask(SVN->getMask(), OuterScale);
16709 
16710         // Merge the shuffle masks.
16711         SmallVector<int, 8> NewMask;
16712         for (int M : OuterMask)
16713           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
16714 
16715         // Test for shuffle mask legality over both commutations.
16716         SDValue SV0 = BC0->getOperand(0);
16717         SDValue SV1 = BC0->getOperand(1);
16718         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16719         if (!LegalMask) {
16720           std::swap(SV0, SV1);
16721           ShuffleVectorSDNode::commuteMask(NewMask);
16722           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
16723         }
16724 
16725         if (LegalMask) {
16726           SV0 = DAG.getBitcast(ScaleVT, SV0);
16727           SV1 = DAG.getBitcast(ScaleVT, SV1);
16728           return DAG.getBitcast(
16729               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
16730         }
16731       }
16732     }
16733   }
16734 
16735   // Canonicalize shuffles according to rules:
16736   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
16737   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
16738   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
16739   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
16740       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
16741       TLI.isTypeLegal(VT)) {
16742     // The incoming shuffle must be of the same type as the result of the
16743     // current shuffle.
16744     assert(N1->getOperand(0).getValueType() == VT &&
16745            "Shuffle types don't match");
16746 
16747     SDValue SV0 = N1->getOperand(0);
16748     SDValue SV1 = N1->getOperand(1);
16749     bool HasSameOp0 = N0 == SV0;
16750     bool IsSV1Undef = SV1.isUndef();
16751     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
16752       // Commute the operands of this shuffle so that next rule
16753       // will trigger.
16754       return DAG.getCommutedVectorShuffle(*SVN);
16755   }
16756 
16757   // Try to fold according to rules:
16758   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16759   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16760   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16761   // Don't try to fold shuffles with illegal type.
16762   // Only fold if this shuffle is the only user of the other shuffle.
16763   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
16764       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
16765     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
16766 
16767     // Don't try to fold splats; they're likely to simplify somehow, or they
16768     // might be free.
16769     if (OtherSV->isSplat())
16770       return SDValue();
16771 
16772     // The incoming shuffle must be of the same type as the result of the
16773     // current shuffle.
16774     assert(OtherSV->getOperand(0).getValueType() == VT &&
16775            "Shuffle types don't match");
16776 
16777     SDValue SV0, SV1;
16778     SmallVector<int, 4> Mask;
16779     // Compute the combined shuffle mask for a shuffle with SV0 as the first
16780     // operand, and SV1 as the second operand.
16781     for (unsigned i = 0; i != NumElts; ++i) {
16782       int Idx = SVN->getMaskElt(i);
16783       if (Idx < 0) {
16784         // Propagate Undef.
16785         Mask.push_back(Idx);
16786         continue;
16787       }
16788 
16789       SDValue CurrentVec;
16790       if (Idx < (int)NumElts) {
16791         // This shuffle index refers to the inner shuffle N0. Lookup the inner
16792         // shuffle mask to identify which vector is actually referenced.
16793         Idx = OtherSV->getMaskElt(Idx);
16794         if (Idx < 0) {
16795           // Propagate Undef.
16796           Mask.push_back(Idx);
16797           continue;
16798         }
16799 
16800         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
16801                                            : OtherSV->getOperand(1);
16802       } else {
16803         // This shuffle index references an element within N1.
16804         CurrentVec = N1;
16805       }
16806 
16807       // Simple case where 'CurrentVec' is UNDEF.
16808       if (CurrentVec.isUndef()) {
16809         Mask.push_back(-1);
16810         continue;
16811       }
16812 
16813       // Canonicalize the shuffle index. We don't know yet if CurrentVec
16814       // will be the first or second operand of the combined shuffle.
16815       Idx = Idx % NumElts;
16816       if (!SV0.getNode() || SV0 == CurrentVec) {
16817         // Ok. CurrentVec is the left hand side.
16818         // Update the mask accordingly.
16819         SV0 = CurrentVec;
16820         Mask.push_back(Idx);
16821         continue;
16822       }
16823 
16824       // Bail out if we cannot convert the shuffle pair into a single shuffle.
16825       if (SV1.getNode() && SV1 != CurrentVec)
16826         return SDValue();
16827 
16828       // Ok. CurrentVec is the right hand side.
16829       // Update the mask accordingly.
16830       SV1 = CurrentVec;
16831       Mask.push_back(Idx + NumElts);
16832     }
16833 
16834     // Check if all indices in Mask are Undef. In case, propagate Undef.
16835     bool isUndefMask = true;
16836     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
16837       isUndefMask &= Mask[i] < 0;
16838 
16839     if (isUndefMask)
16840       return DAG.getUNDEF(VT);
16841 
16842     if (!SV0.getNode())
16843       SV0 = DAG.getUNDEF(VT);
16844     if (!SV1.getNode())
16845       SV1 = DAG.getUNDEF(VT);
16846 
16847     // Avoid introducing shuffles with illegal mask.
16848     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
16849       ShuffleVectorSDNode::commuteMask(Mask);
16850 
16851       if (!TLI.isShuffleMaskLegal(Mask, VT))
16852         return SDValue();
16853 
16854       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
16855       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
16856       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
16857       std::swap(SV0, SV1);
16858     }
16859 
16860     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
16861     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
16862     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
16863     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
16864   }
16865 
16866   return SDValue();
16867 }
16868 
16869 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
16870   SDValue InVal = N->getOperand(0);
16871   EVT VT = N->getValueType(0);
16872 
16873   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
16874   // with a VECTOR_SHUFFLE and possible truncate.
16875   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16876     SDValue InVec = InVal->getOperand(0);
16877     SDValue EltNo = InVal->getOperand(1);
16878     auto InVecT = InVec.getValueType();
16879     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
16880       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
16881       int Elt = C0->getZExtValue();
16882       NewMask[0] = Elt;
16883       SDValue Val;
16884       // If we have an implict truncate do truncate here as long as it's legal.
16885       // if it's not legal, this should
16886       if (VT.getScalarType() != InVal.getValueType() &&
16887           InVal.getValueType().isScalarInteger() &&
16888           isTypeLegal(VT.getScalarType())) {
16889         Val =
16890             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
16891         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
16892       }
16893       if (VT.getScalarType() == InVecT.getScalarType() &&
16894           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
16895           TLI.isShuffleMaskLegal(NewMask, VT)) {
16896         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
16897                                    DAG.getUNDEF(InVecT), NewMask);
16898         // If the initial vector is the correct size this shuffle is a
16899         // valid result.
16900         if (VT == InVecT)
16901           return Val;
16902         // If not we must truncate the vector.
16903         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
16904           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16905           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
16906           EVT SubVT =
16907               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
16908                                VT.getVectorNumElements());
16909           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
16910                             ZeroIdx);
16911           return Val;
16912         }
16913       }
16914     }
16915   }
16916 
16917   return SDValue();
16918 }
16919 
16920 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
16921   EVT VT = N->getValueType(0);
16922   SDValue N0 = N->getOperand(0);
16923   SDValue N1 = N->getOperand(1);
16924   SDValue N2 = N->getOperand(2);
16925 
16926   // If inserting an UNDEF, just return the original vector.
16927   if (N1.isUndef())
16928     return N0;
16929 
16930   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
16931   // us to pull BITCASTs from input to output.
16932   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
16933     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
16934       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
16935 
16936   // If this is an insert of an extracted vector into an undef vector, we can
16937   // just use the input to the extract.
16938   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16939       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
16940     return N1.getOperand(0);
16941 
16942   // If we are inserting a bitcast value into an undef, with the same
16943   // number of elements, just use the bitcast input of the extract.
16944   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
16945   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
16946   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
16947       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16948       N1.getOperand(0).getOperand(1) == N2 &&
16949       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
16950           VT.getVectorNumElements() &&
16951       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
16952           VT.getSizeInBits()) {
16953     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
16954   }
16955 
16956   // If both N1 and N2 are bitcast values on which insert_subvector
16957   // would makes sense, pull the bitcast through.
16958   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
16959   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
16960   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
16961     SDValue CN0 = N0.getOperand(0);
16962     SDValue CN1 = N1.getOperand(0);
16963     EVT CN0VT = CN0.getValueType();
16964     EVT CN1VT = CN1.getValueType();
16965     if (CN0VT.isVector() && CN1VT.isVector() &&
16966         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
16967         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
16968       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
16969                                       CN0.getValueType(), CN0, CN1, N2);
16970       return DAG.getBitcast(VT, NewINSERT);
16971     }
16972   }
16973 
16974   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
16975   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
16976   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
16977   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
16978       N0.getOperand(1).getValueType() == N1.getValueType() &&
16979       N0.getOperand(2) == N2)
16980     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
16981                        N1, N2);
16982 
16983   if (!isa<ConstantSDNode>(N2))
16984     return SDValue();
16985 
16986   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
16987 
16988   // Canonicalize insert_subvector dag nodes.
16989   // Example:
16990   // (insert_subvector (insert_subvector A, Idx0), Idx1)
16991   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
16992   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
16993       N1.getValueType() == N0.getOperand(1).getValueType() &&
16994       isa<ConstantSDNode>(N0.getOperand(2))) {
16995     unsigned OtherIdx = N0.getConstantOperandVal(2);
16996     if (InsIdx < OtherIdx) {
16997       // Swap nodes.
16998       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
16999                                   N0.getOperand(0), N1, N2);
17000       AddToWorklist(NewOp.getNode());
17001       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17002                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17003     }
17004   }
17005 
17006   // If the input vector is a concatenation, and the insert replaces
17007   // one of the pieces, we can optimize into a single concat_vectors.
17008   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17009       N0.getOperand(0).getValueType() == N1.getValueType()) {
17010     unsigned Factor = N1.getValueType().getVectorNumElements();
17011 
17012     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17013     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17014 
17015     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17016   }
17017 
17018   return SDValue();
17019 }
17020 
17021 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17022   SDValue N0 = N->getOperand(0);
17023 
17024   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17025   if (N0->getOpcode() == ISD::FP16_TO_FP)
17026     return N0->getOperand(0);
17027 
17028   return SDValue();
17029 }
17030 
17031 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17032   SDValue N0 = N->getOperand(0);
17033 
17034   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17035   if (N0->getOpcode() == ISD::AND) {
17036     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17037     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17038       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17039                          N0.getOperand(0));
17040     }
17041   }
17042 
17043   return SDValue();
17044 }
17045 
17046 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17047 /// with the destination vector and a zero vector.
17048 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17049 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
17050 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17051   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17052 
17053   EVT VT = N->getValueType(0);
17054   SDValue LHS = N->getOperand(0);
17055   SDValue RHS = peekThroughBitcast(N->getOperand(1));
17056   SDLoc DL(N);
17057 
17058   // Make sure we're not running after operation legalization where it
17059   // may have custom lowered the vector shuffles.
17060   if (LegalOperations)
17061     return SDValue();
17062 
17063   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17064     return SDValue();
17065 
17066   EVT RVT = RHS.getValueType();
17067   unsigned NumElts = RHS.getNumOperands();
17068 
17069   // Attempt to create a valid clear mask, splitting the mask into
17070   // sub elements and checking to see if each is
17071   // all zeros or all ones - suitable for shuffle masking.
17072   auto BuildClearMask = [&](int Split) {
17073     int NumSubElts = NumElts * Split;
17074     int NumSubBits = RVT.getScalarSizeInBits() / Split;
17075 
17076     SmallVector<int, 8> Indices;
17077     for (int i = 0; i != NumSubElts; ++i) {
17078       int EltIdx = i / Split;
17079       int SubIdx = i % Split;
17080       SDValue Elt = RHS.getOperand(EltIdx);
17081       if (Elt.isUndef()) {
17082         Indices.push_back(-1);
17083         continue;
17084       }
17085 
17086       APInt Bits;
17087       if (isa<ConstantSDNode>(Elt))
17088         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17089       else if (isa<ConstantFPSDNode>(Elt))
17090         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17091       else
17092         return SDValue();
17093 
17094       // Extract the sub element from the constant bit mask.
17095       if (DAG.getDataLayout().isBigEndian()) {
17096         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17097       } else {
17098         Bits.lshrInPlace(SubIdx * NumSubBits);
17099       }
17100 
17101       if (Split > 1)
17102         Bits = Bits.trunc(NumSubBits);
17103 
17104       if (Bits.isAllOnesValue())
17105         Indices.push_back(i);
17106       else if (Bits == 0)
17107         Indices.push_back(i + NumSubElts);
17108       else
17109         return SDValue();
17110     }
17111 
17112     // Let's see if the target supports this vector_shuffle.
17113     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17114     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17115     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17116       return SDValue();
17117 
17118     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17119     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17120                                                    DAG.getBitcast(ClearVT, LHS),
17121                                                    Zero, Indices));
17122   };
17123 
17124   // Determine maximum split level (byte level masking).
17125   int MaxSplit = 1;
17126   if (RVT.getScalarSizeInBits() % 8 == 0)
17127     MaxSplit = RVT.getScalarSizeInBits() / 8;
17128 
17129   for (int Split = 1; Split <= MaxSplit; ++Split)
17130     if (RVT.getScalarSizeInBits() % Split == 0)
17131       if (SDValue S = BuildClearMask(Split))
17132         return S;
17133 
17134   return SDValue();
17135 }
17136 
17137 /// Visit a binary vector operation, like ADD.
17138 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17139   assert(N->getValueType(0).isVector() &&
17140          "SimplifyVBinOp only works on vectors!");
17141 
17142   SDValue LHS = N->getOperand(0);
17143   SDValue RHS = N->getOperand(1);
17144   SDValue Ops[] = {LHS, RHS};
17145 
17146   // See if we can constant fold the vector operation.
17147   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17148           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17149     return Fold;
17150 
17151   // Type legalization might introduce new shuffles in the DAG.
17152   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17153   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
17154   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17155       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17156       LHS.getOperand(1).isUndef() &&
17157       RHS.getOperand(1).isUndef()) {
17158     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17159     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17160 
17161     if (SVN0->getMask().equals(SVN1->getMask())) {
17162       EVT VT = N->getValueType(0);
17163       SDValue UndefVector = LHS.getOperand(1);
17164       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17165                                      LHS.getOperand(0), RHS.getOperand(0),
17166                                      N->getFlags());
17167       AddUsersToWorklist(N);
17168       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17169                                   SVN0->getMask());
17170     }
17171   }
17172 
17173   return SDValue();
17174 }
17175 
17176 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17177                                     SDValue N2) {
17178   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17179 
17180   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17181                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
17182 
17183   // If we got a simplified select_cc node back from SimplifySelectCC, then
17184   // break it down into a new SETCC node, and a new SELECT node, and then return
17185   // the SELECT node, since we were called with a SELECT node.
17186   if (SCC.getNode()) {
17187     // Check to see if we got a select_cc back (to turn into setcc/select).
17188     // Otherwise, just return whatever node we got back, like fabs.
17189     if (SCC.getOpcode() == ISD::SELECT_CC) {
17190       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17191                                   N0.getValueType(),
17192                                   SCC.getOperand(0), SCC.getOperand(1),
17193                                   SCC.getOperand(4));
17194       AddToWorklist(SETCC.getNode());
17195       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17196                            SCC.getOperand(2), SCC.getOperand(3));
17197     }
17198 
17199     return SCC;
17200   }
17201   return SDValue();
17202 }
17203 
17204 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17205 /// being selected between, see if we can simplify the select.  Callers of this
17206 /// should assume that TheSelect is deleted if this returns true.  As such, they
17207 /// should return the appropriate thing (e.g. the node) back to the top-level of
17208 /// the DAG combiner loop to avoid it being looked at.
17209 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17210                                     SDValue RHS) {
17211   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17212   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17213   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17214     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17215       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17216       SDValue Sqrt = RHS;
17217       ISD::CondCode CC;
17218       SDValue CmpLHS;
17219       const ConstantFPSDNode *Zero = nullptr;
17220 
17221       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17222         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17223         CmpLHS = TheSelect->getOperand(0);
17224         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17225       } else {
17226         // SELECT or VSELECT
17227         SDValue Cmp = TheSelect->getOperand(0);
17228         if (Cmp.getOpcode() == ISD::SETCC) {
17229           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17230           CmpLHS = Cmp.getOperand(0);
17231           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17232         }
17233       }
17234       if (Zero && Zero->isZero() &&
17235           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17236           CC == ISD::SETULT || CC == ISD::SETLT)) {
17237         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17238         CombineTo(TheSelect, Sqrt);
17239         return true;
17240       }
17241     }
17242   }
17243   // Cannot simplify select with vector condition
17244   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17245 
17246   // If this is a select from two identical things, try to pull the operation
17247   // through the select.
17248   if (LHS.getOpcode() != RHS.getOpcode() ||
17249       !LHS.hasOneUse() || !RHS.hasOneUse())
17250     return false;
17251 
17252   // If this is a load and the token chain is identical, replace the select
17253   // of two loads with a load through a select of the address to load from.
17254   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17255   // constants have been dropped into the constant pool.
17256   if (LHS.getOpcode() == ISD::LOAD) {
17257     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17258     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17259 
17260     // Token chains must be identical.
17261     if (LHS.getOperand(0) != RHS.getOperand(0) ||
17262         // Do not let this transformation reduce the number of volatile loads.
17263         LLD->isVolatile() || RLD->isVolatile() ||
17264         // FIXME: If either is a pre/post inc/dec load,
17265         // we'd need to split out the address adjustment.
17266         LLD->isIndexed() || RLD->isIndexed() ||
17267         // If this is an EXTLOAD, the VT's must match.
17268         LLD->getMemoryVT() != RLD->getMemoryVT() ||
17269         // If this is an EXTLOAD, the kind of extension must match.
17270         (LLD->getExtensionType() != RLD->getExtensionType() &&
17271          // The only exception is if one of the extensions is anyext.
17272          LLD->getExtensionType() != ISD::EXTLOAD &&
17273          RLD->getExtensionType() != ISD::EXTLOAD) ||
17274         // FIXME: this discards src value information.  This is
17275         // over-conservative. It would be beneficial to be able to remember
17276         // both potential memory locations.  Since we are discarding
17277         // src value info, don't do the transformation if the memory
17278         // locations are not in the default address space.
17279         LLD->getPointerInfo().getAddrSpace() != 0 ||
17280         RLD->getPointerInfo().getAddrSpace() != 0 ||
17281         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17282                                       LLD->getBasePtr().getValueType()))
17283       return false;
17284 
17285     // Check that the select condition doesn't reach either load.  If so,
17286     // folding this will induce a cycle into the DAG.  If not, this is safe to
17287     // xform, so create a select of the addresses.
17288     SDValue Addr;
17289     if (TheSelect->getOpcode() == ISD::SELECT) {
17290       SDNode *CondNode = TheSelect->getOperand(0).getNode();
17291       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
17292           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
17293         return false;
17294       // The loads must not depend on one another.
17295       if (LLD->isPredecessorOf(RLD) ||
17296           RLD->isPredecessorOf(LLD))
17297         return false;
17298       Addr = DAG.getSelect(SDLoc(TheSelect),
17299                            LLD->getBasePtr().getValueType(),
17300                            TheSelect->getOperand(0), LLD->getBasePtr(),
17301                            RLD->getBasePtr());
17302     } else {  // Otherwise SELECT_CC
17303       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
17304       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
17305 
17306       if ((LLD->hasAnyUseOfValue(1) &&
17307            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
17308           (RLD->hasAnyUseOfValue(1) &&
17309            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
17310         return false;
17311 
17312       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
17313                          LLD->getBasePtr().getValueType(),
17314                          TheSelect->getOperand(0),
17315                          TheSelect->getOperand(1),
17316                          LLD->getBasePtr(), RLD->getBasePtr(),
17317                          TheSelect->getOperand(4));
17318     }
17319 
17320     SDValue Load;
17321     // It is safe to replace the two loads if they have different alignments,
17322     // but the new load must be the minimum (most restrictive) alignment of the
17323     // inputs.
17324     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
17325     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
17326     if (!RLD->isInvariant())
17327       MMOFlags &= ~MachineMemOperand::MOInvariant;
17328     if (!RLD->isDereferenceable())
17329       MMOFlags &= ~MachineMemOperand::MODereferenceable;
17330     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
17331       // FIXME: Discards pointer and AA info.
17332       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
17333                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
17334                          MMOFlags);
17335     } else {
17336       // FIXME: Discards pointer and AA info.
17337       Load = DAG.getExtLoad(
17338           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
17339                                                   : LLD->getExtensionType(),
17340           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
17341           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
17342     }
17343 
17344     // Users of the select now use the result of the load.
17345     CombineTo(TheSelect, Load);
17346 
17347     // Users of the old loads now use the new load's chain.  We know the
17348     // old-load value is dead now.
17349     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
17350     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
17351     return true;
17352   }
17353 
17354   return false;
17355 }
17356 
17357 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
17358 /// bitwise 'and'.
17359 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
17360                                             SDValue N1, SDValue N2, SDValue N3,
17361                                             ISD::CondCode CC) {
17362   // If this is a select where the false operand is zero and the compare is a
17363   // check of the sign bit, see if we can perform the "gzip trick":
17364   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
17365   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
17366   EVT XType = N0.getValueType();
17367   EVT AType = N2.getValueType();
17368   if (!isNullConstant(N3) || !XType.bitsGE(AType))
17369     return SDValue();
17370 
17371   // If the comparison is testing for a positive value, we have to invert
17372   // the sign bit mask, so only do that transform if the target has a bitwise
17373   // 'and not' instruction (the invert is free).
17374   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
17375     // (X > -1) ? A : 0
17376     // (X >  0) ? X : 0 <-- This is canonical signed max.
17377     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
17378       return SDValue();
17379   } else if (CC == ISD::SETLT) {
17380     // (X <  0) ? A : 0
17381     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
17382     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
17383       return SDValue();
17384   } else {
17385     return SDValue();
17386   }
17387 
17388   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
17389   // constant.
17390   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
17391   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17392   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
17393     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
17394     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
17395     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
17396     AddToWorklist(Shift.getNode());
17397 
17398     if (XType.bitsGT(AType)) {
17399       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17400       AddToWorklist(Shift.getNode());
17401     }
17402 
17403     if (CC == ISD::SETGT)
17404       Shift = DAG.getNOT(DL, Shift, AType);
17405 
17406     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17407   }
17408 
17409   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
17410   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
17411   AddToWorklist(Shift.getNode());
17412 
17413   if (XType.bitsGT(AType)) {
17414     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17415     AddToWorklist(Shift.getNode());
17416   }
17417 
17418   if (CC == ISD::SETGT)
17419     Shift = DAG.getNOT(DL, Shift, AType);
17420 
17421   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17422 }
17423 
17424 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
17425 /// where 'cond' is the comparison specified by CC.
17426 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
17427                                       SDValue N2, SDValue N3, ISD::CondCode CC,
17428                                       bool NotExtCompare) {
17429   // (x ? y : y) -> y.
17430   if (N2 == N3) return N2;
17431 
17432   EVT VT = N2.getValueType();
17433   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
17434   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17435 
17436   // Determine if the condition we're dealing with is constant
17437   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
17438                               N0, N1, CC, DL, false);
17439   if (SCC.getNode()) AddToWorklist(SCC.getNode());
17440 
17441   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
17442     // fold select_cc true, x, y -> x
17443     // fold select_cc false, x, y -> y
17444     return !SCCC->isNullValue() ? N2 : N3;
17445   }
17446 
17447   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
17448   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
17449   // in it.  This is a win when the constant is not otherwise available because
17450   // it replaces two constant pool loads with one.  We only do this if the FP
17451   // type is known to be legal, because if it isn't, then we are before legalize
17452   // types an we want the other legalization to happen first (e.g. to avoid
17453   // messing with soft float) and if the ConstantFP is not legal, because if
17454   // it is legal, we may not need to store the FP constant in a constant pool.
17455   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
17456     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
17457       if (TLI.isTypeLegal(N2.getValueType()) &&
17458           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
17459                TargetLowering::Legal &&
17460            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
17461            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
17462           // If both constants have multiple uses, then we won't need to do an
17463           // extra load, they are likely around in registers for other users.
17464           (TV->hasOneUse() || FV->hasOneUse())) {
17465         Constant *Elts[] = {
17466           const_cast<ConstantFP*>(FV->getConstantFPValue()),
17467           const_cast<ConstantFP*>(TV->getConstantFPValue())
17468         };
17469         Type *FPTy = Elts[0]->getType();
17470         const DataLayout &TD = DAG.getDataLayout();
17471 
17472         // Create a ConstantArray of the two constants.
17473         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
17474         SDValue CPIdx =
17475             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
17476                                 TD.getPrefTypeAlignment(FPTy));
17477         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
17478 
17479         // Get the offsets to the 0 and 1 element of the array so that we can
17480         // select between them.
17481         SDValue Zero = DAG.getIntPtrConstant(0, DL);
17482         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
17483         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
17484 
17485         SDValue Cond = DAG.getSetCC(DL,
17486                                     getSetCCResultType(N0.getValueType()),
17487                                     N0, N1, CC);
17488         AddToWorklist(Cond.getNode());
17489         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
17490                                           Cond, One, Zero);
17491         AddToWorklist(CstOffset.getNode());
17492         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
17493                             CstOffset);
17494         AddToWorklist(CPIdx.getNode());
17495         return DAG.getLoad(
17496             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
17497             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
17498             Alignment);
17499       }
17500     }
17501 
17502   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
17503     return V;
17504 
17505   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
17506   // where y is has a single bit set.
17507   // A plaintext description would be, we can turn the SELECT_CC into an AND
17508   // when the condition can be materialized as an all-ones register.  Any
17509   // single bit-test can be materialized as an all-ones register with
17510   // shift-left and shift-right-arith.
17511   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
17512       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
17513     SDValue AndLHS = N0->getOperand(0);
17514     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17515     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
17516       // Shift the tested bit over the sign bit.
17517       const APInt &AndMask = ConstAndRHS->getAPIntValue();
17518       SDValue ShlAmt =
17519         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
17520                         getShiftAmountTy(AndLHS.getValueType()));
17521       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
17522 
17523       // Now arithmetic right shift it all the way over, so the result is either
17524       // all-ones, or zero.
17525       SDValue ShrAmt =
17526         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
17527                         getShiftAmountTy(Shl.getValueType()));
17528       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
17529 
17530       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
17531     }
17532   }
17533 
17534   // fold select C, 16, 0 -> shl C, 4
17535   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
17536       TLI.getBooleanContents(N0.getValueType()) ==
17537           TargetLowering::ZeroOrOneBooleanContent) {
17538 
17539     // If the caller doesn't want us to simplify this into a zext of a compare,
17540     // don't do it.
17541     if (NotExtCompare && N2C->isOne())
17542       return SDValue();
17543 
17544     // Get a SetCC of the condition
17545     // NOTE: Don't create a SETCC if it's not legal on this target.
17546     if (!LegalOperations ||
17547         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
17548       SDValue Temp, SCC;
17549       // cast from setcc result type to select result type
17550       if (LegalTypes) {
17551         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
17552                             N0, N1, CC);
17553         if (N2.getValueType().bitsLT(SCC.getValueType()))
17554           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
17555                                         N2.getValueType());
17556         else
17557           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17558                              N2.getValueType(), SCC);
17559       } else {
17560         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
17561         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17562                            N2.getValueType(), SCC);
17563       }
17564 
17565       AddToWorklist(SCC.getNode());
17566       AddToWorklist(Temp.getNode());
17567 
17568       if (N2C->isOne())
17569         return Temp;
17570 
17571       // shl setcc result by log2 n2c
17572       return DAG.getNode(
17573           ISD::SHL, DL, N2.getValueType(), Temp,
17574           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
17575                           getShiftAmountTy(Temp.getValueType())));
17576     }
17577   }
17578 
17579   // Check to see if this is an integer abs.
17580   // select_cc setg[te] X,  0,  X, -X ->
17581   // select_cc setgt    X, -1,  X, -X ->
17582   // select_cc setl[te] X,  0, -X,  X ->
17583   // select_cc setlt    X,  1, -X,  X ->
17584   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17585   if (N1C) {
17586     ConstantSDNode *SubC = nullptr;
17587     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17588          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17589         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17590       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17591     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17592               (N1C->isOne() && CC == ISD::SETLT)) &&
17593              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17594       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
17595 
17596     EVT XType = N0.getValueType();
17597     if (SubC && SubC->isNullValue() && XType.isInteger()) {
17598       SDLoc DL(N0);
17599       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
17600                                   N0,
17601                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
17602                                          getShiftAmountTy(N0.getValueType())));
17603       SDValue Add = DAG.getNode(ISD::ADD, DL,
17604                                 XType, N0, Shift);
17605       AddToWorklist(Shift.getNode());
17606       AddToWorklist(Add.getNode());
17607       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
17608     }
17609   }
17610 
17611   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
17612   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
17613   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
17614   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
17615   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
17616   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
17617   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
17618   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
17619   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
17620     SDValue ValueOnZero = N2;
17621     SDValue Count = N3;
17622     // If the condition is NE instead of E, swap the operands.
17623     if (CC == ISD::SETNE)
17624       std::swap(ValueOnZero, Count);
17625     // Check if the value on zero is a constant equal to the bits in the type.
17626     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
17627       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
17628         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
17629         // legal, combine to just cttz.
17630         if ((Count.getOpcode() == ISD::CTTZ ||
17631              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
17632             N0 == Count.getOperand(0) &&
17633             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
17634           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
17635         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
17636         // legal, combine to just ctlz.
17637         if ((Count.getOpcode() == ISD::CTLZ ||
17638              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
17639             N0 == Count.getOperand(0) &&
17640             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
17641           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
17642       }
17643     }
17644   }
17645 
17646   return SDValue();
17647 }
17648 
17649 /// This is a stub for TargetLowering::SimplifySetCC.
17650 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
17651                                    ISD::CondCode Cond, const SDLoc &DL,
17652                                    bool foldBooleans) {
17653   TargetLowering::DAGCombinerInfo
17654     DagCombineInfo(DAG, Level, false, this);
17655   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
17656 }
17657 
17658 /// Given an ISD::SDIV node expressing a divide by constant, return
17659 /// a DAG expression to select that will generate the same value by multiplying
17660 /// by a magic number.
17661 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17662 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
17663   // when optimising for minimum size, we don't want to expand a div to a mul
17664   // and a shift.
17665   if (DAG.getMachineFunction().getFunction().optForMinSize())
17666     return SDValue();
17667 
17668   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17669   if (!C)
17670     return SDValue();
17671 
17672   // Avoid division by zero.
17673   if (C->isNullValue())
17674     return SDValue();
17675 
17676   std::vector<SDNode *> Built;
17677   SDValue S =
17678       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17679 
17680   for (SDNode *N : Built)
17681     AddToWorklist(N);
17682   return S;
17683 }
17684 
17685 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
17686 /// DAG expression that will generate the same value by right shifting.
17687 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
17688   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17689   if (!C)
17690     return SDValue();
17691 
17692   // Avoid division by zero.
17693   if (C->isNullValue())
17694     return SDValue();
17695 
17696   std::vector<SDNode *> Built;
17697   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
17698 
17699   for (SDNode *N : Built)
17700     AddToWorklist(N);
17701   return S;
17702 }
17703 
17704 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
17705 /// expression that will generate the same value by multiplying by a magic
17706 /// number.
17707 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
17708 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
17709   // when optimising for minimum size, we don't want to expand a div to a mul
17710   // and a shift.
17711   if (DAG.getMachineFunction().getFunction().optForMinSize())
17712     return SDValue();
17713 
17714   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
17715   if (!C)
17716     return SDValue();
17717 
17718   // Avoid division by zero.
17719   if (C->isNullValue())
17720     return SDValue();
17721 
17722   std::vector<SDNode *> Built;
17723   SDValue S =
17724       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
17725 
17726   for (SDNode *N : Built)
17727     AddToWorklist(N);
17728   return S;
17729 }
17730 
17731 /// Determines the LogBase2 value for a non-null input value using the
17732 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
17733 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
17734   EVT VT = V.getValueType();
17735   unsigned EltBits = VT.getScalarSizeInBits();
17736   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
17737   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
17738   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
17739   return LogBase2;
17740 }
17741 
17742 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17743 /// For the reciprocal, we need to find the zero of the function:
17744 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
17745 ///     =>
17746 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
17747 ///     does not require additional intermediate precision]
17748 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
17749   if (Level >= AfterLegalizeDAG)
17750     return SDValue();
17751 
17752   // TODO: Handle half and/or extended types?
17753   EVT VT = Op.getValueType();
17754   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17755     return SDValue();
17756 
17757   // If estimates are explicitly disabled for this function, we're done.
17758   MachineFunction &MF = DAG.getMachineFunction();
17759   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
17760   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17761     return SDValue();
17762 
17763   // Estimates may be explicitly enabled for this type with a custom number of
17764   // refinement steps.
17765   int Iterations = TLI.getDivRefinementSteps(VT, MF);
17766   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
17767     AddToWorklist(Est.getNode());
17768 
17769     if (Iterations) {
17770       EVT VT = Op.getValueType();
17771       SDLoc DL(Op);
17772       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17773 
17774       // Newton iterations: Est = Est + Est (1 - Arg * Est)
17775       for (int i = 0; i < Iterations; ++i) {
17776         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
17777         AddToWorklist(NewEst.getNode());
17778 
17779         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
17780         AddToWorklist(NewEst.getNode());
17781 
17782         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17783         AddToWorklist(NewEst.getNode());
17784 
17785         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
17786         AddToWorklist(Est.getNode());
17787       }
17788     }
17789     return Est;
17790   }
17791 
17792   return SDValue();
17793 }
17794 
17795 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17796 /// For the reciprocal sqrt, we need to find the zero of the function:
17797 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17798 ///     =>
17799 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
17800 /// As a result, we precompute A/2 prior to the iteration loop.
17801 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
17802                                          unsigned Iterations,
17803                                          SDNodeFlags Flags, bool Reciprocal) {
17804   EVT VT = Arg.getValueType();
17805   SDLoc DL(Arg);
17806   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
17807 
17808   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
17809   // this entire sequence requires only one FP constant.
17810   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
17811   AddToWorklist(HalfArg.getNode());
17812 
17813   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
17814   AddToWorklist(HalfArg.getNode());
17815 
17816   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
17817   for (unsigned i = 0; i < Iterations; ++i) {
17818     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
17819     AddToWorklist(NewEst.getNode());
17820 
17821     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
17822     AddToWorklist(NewEst.getNode());
17823 
17824     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
17825     AddToWorklist(NewEst.getNode());
17826 
17827     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
17828     AddToWorklist(Est.getNode());
17829   }
17830 
17831   // If non-reciprocal square root is requested, multiply the result by Arg.
17832   if (!Reciprocal) {
17833     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
17834     AddToWorklist(Est.getNode());
17835   }
17836 
17837   return Est;
17838 }
17839 
17840 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
17841 /// For the reciprocal sqrt, we need to find the zero of the function:
17842 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
17843 ///     =>
17844 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
17845 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
17846                                          unsigned Iterations,
17847                                          SDNodeFlags Flags, bool Reciprocal) {
17848   EVT VT = Arg.getValueType();
17849   SDLoc DL(Arg);
17850   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
17851   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
17852 
17853   // This routine must enter the loop below to work correctly
17854   // when (Reciprocal == false).
17855   assert(Iterations > 0);
17856 
17857   // Newton iterations for reciprocal square root:
17858   // E = (E * -0.5) * ((A * E) * E + -3.0)
17859   for (unsigned i = 0; i < Iterations; ++i) {
17860     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
17861     AddToWorklist(AE.getNode());
17862 
17863     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
17864     AddToWorklist(AEE.getNode());
17865 
17866     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
17867     AddToWorklist(RHS.getNode());
17868 
17869     // When calculating a square root at the last iteration build:
17870     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
17871     // (notice a common subexpression)
17872     SDValue LHS;
17873     if (Reciprocal || (i + 1) < Iterations) {
17874       // RSQRT: LHS = (E * -0.5)
17875       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
17876     } else {
17877       // SQRT: LHS = (A * E) * -0.5
17878       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
17879     }
17880     AddToWorklist(LHS.getNode());
17881 
17882     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
17883     AddToWorklist(Est.getNode());
17884   }
17885 
17886   return Est;
17887 }
17888 
17889 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
17890 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
17891 /// Op can be zero.
17892 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
17893                                            bool Reciprocal) {
17894   if (Level >= AfterLegalizeDAG)
17895     return SDValue();
17896 
17897   // TODO: Handle half and/or extended types?
17898   EVT VT = Op.getValueType();
17899   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
17900     return SDValue();
17901 
17902   // If estimates are explicitly disabled for this function, we're done.
17903   MachineFunction &MF = DAG.getMachineFunction();
17904   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
17905   if (Enabled == TLI.ReciprocalEstimate::Disabled)
17906     return SDValue();
17907 
17908   // Estimates may be explicitly enabled for this type with a custom number of
17909   // refinement steps.
17910   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
17911 
17912   bool UseOneConstNR = false;
17913   if (SDValue Est =
17914       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
17915                           Reciprocal)) {
17916     AddToWorklist(Est.getNode());
17917 
17918     if (Iterations) {
17919       Est = UseOneConstNR
17920             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
17921             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
17922 
17923       if (!Reciprocal) {
17924         // The estimate is now completely wrong if the input was exactly 0.0 or
17925         // possibly a denormal. Force the answer to 0.0 for those cases.
17926         EVT VT = Op.getValueType();
17927         SDLoc DL(Op);
17928         EVT CCVT = getSetCCResultType(VT);
17929         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
17930         const Function &F = DAG.getMachineFunction().getFunction();
17931         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
17932         if (Denorms.getValueAsString().equals("ieee")) {
17933           // fabs(X) < SmallestNormal ? 0.0 : Est
17934           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
17935           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
17936           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
17937           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17938           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
17939           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
17940           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
17941           AddToWorklist(Fabs.getNode());
17942           AddToWorklist(IsDenorm.getNode());
17943           AddToWorklist(Est.getNode());
17944         } else {
17945           // X == 0.0 ? 0.0 : Est
17946           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
17947           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
17948           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
17949           AddToWorklist(IsZero.getNode());
17950           AddToWorklist(Est.getNode());
17951         }
17952       }
17953     }
17954     return Est;
17955   }
17956 
17957   return SDValue();
17958 }
17959 
17960 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17961   return buildSqrtEstimateImpl(Op, Flags, true);
17962 }
17963 
17964 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
17965   return buildSqrtEstimateImpl(Op, Flags, false);
17966 }
17967 
17968 /// Return true if there is any possibility that the two addresses overlap.
17969 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
17970   // If they are the same then they must be aliases.
17971   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
17972 
17973   // If they are both volatile then they cannot be reordered.
17974   if (Op0->isVolatile() && Op1->isVolatile()) return true;
17975 
17976   // If one operation reads from invariant memory, and the other may store, they
17977   // cannot alias. These should really be checking the equivalent of mayWrite,
17978   // but it only matters for memory nodes other than load /store.
17979   if (Op0->isInvariant() && Op1->writeMem())
17980     return false;
17981 
17982   if (Op1->isInvariant() && Op0->writeMem())
17983     return false;
17984 
17985   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
17986   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
17987 
17988   // Check for BaseIndexOffset matching.
17989   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
17990   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
17991   int64_t PtrDiff;
17992   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
17993     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
17994       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
17995 
17996     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
17997     // able to calculate their relative offset if at least one arises
17998     // from an alloca. However, these allocas cannot overlap and we
17999     // can infer there is no alias.
18000     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18001       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18002         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18003         // If the base are the same frame index but the we couldn't find a
18004         // constant offset, (indices are different) be conservative.
18005         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18006                        !MFI.isFixedObjectIndex(B->getIndex())))
18007           return false;
18008       }
18009 
18010     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18011     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18012     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18013     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18014     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18015     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18016 
18017     // If of mismatched base types or checkable indices we can check
18018     // they do not alias.
18019     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18020          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18021         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18022       return false;
18023   }
18024 
18025   // If we know required SrcValue1 and SrcValue2 have relatively large
18026   // alignment compared to the size and offset of the access, we may be able
18027   // to prove they do not alias. This check is conservative for now to catch
18028   // cases created by splitting vector types.
18029   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18030   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18031   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18032   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18033   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18034       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18035     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18036     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18037 
18038     // There is no overlap between these relatively aligned accesses of
18039     // similar size. Return no alias.
18040     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18041         (OffAlign1 + NumBytes1) <= OffAlign0)
18042       return false;
18043   }
18044 
18045   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18046                    ? CombinerGlobalAA
18047                    : DAG.getSubtarget().useAA();
18048 #ifndef NDEBUG
18049   if (CombinerAAOnlyFunc.getNumOccurrences() &&
18050       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18051     UseAA = false;
18052 #endif
18053 
18054   if (UseAA && AA &&
18055       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18056     // Use alias analysis information.
18057     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18058     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18059     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18060     AliasResult AAResult =
18061         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18062                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18063                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18064                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18065     if (AAResult == NoAlias)
18066       return false;
18067   }
18068 
18069   // Otherwise we have to assume they alias.
18070   return true;
18071 }
18072 
18073 /// Walk up chain skipping non-aliasing memory nodes,
18074 /// looking for aliasing nodes and adding them to the Aliases vector.
18075 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18076                                    SmallVectorImpl<SDValue> &Aliases) {
18077   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
18078   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
18079 
18080   // Get alias information for node.
18081   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18082 
18083   // Starting off.
18084   Chains.push_back(OriginalChain);
18085   unsigned Depth = 0;
18086 
18087   // Look at each chain and determine if it is an alias.  If so, add it to the
18088   // aliases list.  If not, then continue up the chain looking for the next
18089   // candidate.
18090   while (!Chains.empty()) {
18091     SDValue Chain = Chains.pop_back_val();
18092 
18093     // For TokenFactor nodes, look at each operand and only continue up the
18094     // chain until we reach the depth limit.
18095     //
18096     // FIXME: The depth check could be made to return the last non-aliasing
18097     // chain we found before we hit a tokenfactor rather than the original
18098     // chain.
18099     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18100       Aliases.clear();
18101       Aliases.push_back(OriginalChain);
18102       return;
18103     }
18104 
18105     // Don't bother if we've been before.
18106     if (!Visited.insert(Chain.getNode()).second)
18107       continue;
18108 
18109     switch (Chain.getOpcode()) {
18110     case ISD::EntryToken:
18111       // Entry token is ideal chain operand, but handled in FindBetterChain.
18112       break;
18113 
18114     case ISD::LOAD:
18115     case ISD::STORE: {
18116       // Get alias information for Chain.
18117       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18118           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18119 
18120       // If chain is alias then stop here.
18121       if (!(IsLoad && IsOpLoad) &&
18122           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18123         Aliases.push_back(Chain);
18124       } else {
18125         // Look further up the chain.
18126         Chains.push_back(Chain.getOperand(0));
18127         ++Depth;
18128       }
18129       break;
18130     }
18131 
18132     case ISD::TokenFactor:
18133       // We have to check each of the operands of the token factor for "small"
18134       // token factors, so we queue them up.  Adding the operands to the queue
18135       // (stack) in reverse order maintains the original order and increases the
18136       // likelihood that getNode will find a matching token factor (CSE.)
18137       if (Chain.getNumOperands() > 16) {
18138         Aliases.push_back(Chain);
18139         break;
18140       }
18141       for (unsigned n = Chain.getNumOperands(); n;)
18142         Chains.push_back(Chain.getOperand(--n));
18143       ++Depth;
18144       break;
18145 
18146     case ISD::CopyFromReg:
18147       // Forward past CopyFromReg.
18148       Chains.push_back(Chain.getOperand(0));
18149       ++Depth;
18150       break;
18151 
18152     default:
18153       // For all other instructions we will just have to take what we can get.
18154       Aliases.push_back(Chain);
18155       break;
18156     }
18157   }
18158 }
18159 
18160 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18161 /// (aliasing node.)
18162 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18163   if (OptLevel == CodeGenOpt::None)
18164     return OldChain;
18165 
18166   // Ops for replacing token factor.
18167   SmallVector<SDValue, 8> Aliases;
18168 
18169   // Accumulate all the aliases to this node.
18170   GatherAllAliases(N, OldChain, Aliases);
18171 
18172   // If no operands then chain to entry token.
18173   if (Aliases.size() == 0)
18174     return DAG.getEntryNode();
18175 
18176   // If a single operand then chain to it.  We don't need to revisit it.
18177   if (Aliases.size() == 1)
18178     return Aliases[0];
18179 
18180   // Construct a custom tailored token factor.
18181   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18182 }
18183 
18184 // This function tries to collect a bunch of potentially interesting
18185 // nodes to improve the chains of, all at once. This might seem
18186 // redundant, as this function gets called when visiting every store
18187 // node, so why not let the work be done on each store as it's visited?
18188 //
18189 // I believe this is mainly important because MergeConsecutiveStores
18190 // is unable to deal with merging stores of different sizes, so unless
18191 // we improve the chains of all the potential candidates up-front
18192 // before running MergeConsecutiveStores, it might only see some of
18193 // the nodes that will eventually be candidates, and then not be able
18194 // to go from a partially-merged state to the desired final
18195 // fully-merged state.
18196 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
18197   if (OptLevel == CodeGenOpt::None)
18198     return false;
18199 
18200   // This holds the base pointer, index, and the offset in bytes from the base
18201   // pointer.
18202   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18203 
18204   // We must have a base and an offset.
18205   if (!BasePtr.getBase().getNode())
18206     return false;
18207 
18208   // Do not handle stores to undef base pointers.
18209   if (BasePtr.getBase().isUndef())
18210     return false;
18211 
18212   SmallVector<StoreSDNode *, 8> ChainedStores;
18213   ChainedStores.push_back(St);
18214 
18215   // Walk up the chain and look for nodes with offsets from the same
18216   // base pointer. Stop when reaching an instruction with a different kind
18217   // or instruction which has a different base pointer.
18218   StoreSDNode *Index = St;
18219   while (Index) {
18220     // If the chain has more than one use, then we can't reorder the mem ops.
18221     if (Index != St && !SDValue(Index, 0)->hasOneUse())
18222       break;
18223 
18224     if (Index->isVolatile() || Index->isIndexed())
18225       break;
18226 
18227     // Find the base pointer and offset for this memory node.
18228     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
18229 
18230     // Check that the base pointer is the same as the original one.
18231     if (!BasePtr.equalBaseIndex(Ptr, DAG))
18232       break;
18233 
18234     // Walk up the chain to find the next store node, ignoring any
18235     // intermediate loads. Any other kind of node will halt the loop.
18236     SDNode *NextInChain = Index->getChain().getNode();
18237     while (true) {
18238       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
18239         // We found a store node. Use it for the next iteration.
18240         if (STn->isVolatile() || STn->isIndexed()) {
18241           Index = nullptr;
18242           break;
18243         }
18244         ChainedStores.push_back(STn);
18245         Index = STn;
18246         break;
18247       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
18248         NextInChain = Ldn->getChain().getNode();
18249         continue;
18250       } else {
18251         Index = nullptr;
18252         break;
18253       }
18254     } // end while
18255   }
18256 
18257   // At this point, ChainedStores lists all of the Store nodes
18258   // reachable by iterating up through chain nodes matching the above
18259   // conditions.  For each such store identified, try to find an
18260   // earlier chain to attach the store to which won't violate the
18261   // required ordering.
18262   bool MadeChangeToSt = false;
18263   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
18264 
18265   for (StoreSDNode *ChainedStore : ChainedStores) {
18266     SDValue Chain = ChainedStore->getChain();
18267     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
18268 
18269     if (Chain != BetterChain) {
18270       if (ChainedStore == St)
18271         MadeChangeToSt = true;
18272       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
18273     }
18274   }
18275 
18276   // Do all replacements after finding the replacements to make to avoid making
18277   // the chains more complicated by introducing new TokenFactors.
18278   for (auto Replacement : BetterChains)
18279     replaceStoreChain(Replacement.first, Replacement.second);
18280 
18281   return MadeChangeToSt;
18282 }
18283 
18284 /// This is the entry point for the file.
18285 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
18286                            CodeGenOpt::Level OptLevel) {
18287   /// This is the main entry point to this class.
18288   DAGCombiner(*this, AA, OptLevel).Run(Level);
18289 }
18290